diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py
index d0589f402456e8ffe8320ce7f780738aef22fbe4..b8e7efb59673c3b3c9206bfda535bdd1f85e451d 100644
--- a/Python/actuator_line/actuator_line.py
+++ b/Python/actuator_line/actuator_line.py
@@ -104,8 +104,8 @@ length = np.array([6,4,1])*boundary_layer_height
 dx = boundary_layer_height/nodes_per_height
 dt = dx * mach / (np.sqrt(3) * velocity)
 velocity_ratio = dx/dt
-velocity_LB = velocity / velocity_ratio # LB units
-viscosity_LB = viscosity / (velocity_ratio * dx) # LB units
+velocity_LB = velocity / velocity_ratio  # LB units
+viscosity_LB = viscosity / (velocity_ratio * dx)  # LB units
 pressure_gradient = u_star * u_star / boundary_layer_height
 pressure_gradient_LB = pressure_gradient * (dt*dt)/dx
 
@@ -142,7 +142,7 @@ tm_factory.read_config_file(config)
 grid_scaling_factory = gpu.GridScalingFactory()
 grid_scaling_factory.set_scaling_factory(gpu.GridScaling.ScaleCompressible)
 
-grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
+grid_builder.add_coarse_grid(0.0, 0.0, 0.0, length[0], length[1], length[2], dx)
 grid_builder.set_periodic_boundary_condition(not read_precursor, True, False)
 grid_builder.build_grids(False)
 
@@ -162,7 +162,7 @@ bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack)
 bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective)
 if read_precursor:
     bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor)
-para.set_outflow_pressure_correction_factor(0.0); 
+para.set_outflow_pressure_correction_factor(0.0)
 #%%
 # don't use python init functions, they are very slow! Just kept as an example.
 # Define lambda in bindings and set it here.
@@ -176,7 +176,7 @@ para.set_outflow_pressure_correction_factor(0.0);
 para.set_initial_condition_perturbed_log_law(u_star, z0, length[0], length[2], boundary_layer_height, velocity_ratio)
 
 #%%
-turb_pos = np.array([3,3,3])*turbine_diameter
+turb_pos = np.array([3, 3, 3])*turbine_diameter
 epsilon = 1.5*dx
 density = 1.225
 level = 0
@@ -185,7 +185,7 @@ n_blade_nodes = 32
 omega = 1
 blade_radii = np.arange(n_blade_nodes, dtype=np.float32)/(0.5*turbine_diameter)
 alm = gpu.ActuatorFarm(n_blades, density, n_blade_nodes, epsilon, level, dt, dx, True)
-alm.add_turbine(turb_pos[0],turb_pos[1],turb_pos[2], turbine_diameter, omega, 0, 0, blade_radii)
+alm.add_turbine(turb_pos[0], turb_pos[1], turb_pos[2], turbine_diameter, omega, 0, 0, blade_radii)
 para.add_actuator(alm)
 #%%
 planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z')
diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py
index 25b3cd895f8a3a80f9fd6438e00d3e924fc13779..0ebdbb43894e6bdae55cca1f788a33c3739fb0c6 100644
--- a/Python/boundary_layer/boundary_layer.py
+++ b/Python/boundary_layer/boundary_layer.py
@@ -116,7 +116,7 @@ logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}")
 logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}")
 logger.vf_log_info(f"dpdx  = {pressure_gradient}")
 logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}")
-    
+
 #%%
 
 #%%
@@ -136,6 +136,8 @@ para.set_timestep_start_out(int(t_start_out/dt))
 para.set_timestep_out(int(t_out/dt))
 para.set_timestep_end(int(t_end/dt))
 para.set_is_body_force(config.get_bool_value("bodyForce"))
+para.set_devices(np.arange(10))
+para.set_max_dev(communicator.get_number_of_process())
 #%%
 tm_factory = gpu.TurbulenceModelFactory(para)
 tm_factory.read_config_file(config)
@@ -161,13 +163,7 @@ bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective)
 bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor)
 para.set_outflow_pressure_correction_factor(0.0); 
 #%%
-def init_func(coord_x, coord_y, coord_z):
-    return [
-        0.0, 
-        (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1))  * dt / dx, 
-        2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.)  * dt / dx, 
-        8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx]
-para.set_initial_condition(init_func)
+para.set_initial_condition_perturbed_log_law(u_star, z0, length[0], length[2], boundary_layer_height, dx/dx)
 
 #%%
 planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z')
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 368e80a726c1f6e75bd5300ad3de60fd840ba119..9c7394951e795055d22be9b155166464c998c098 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -236,21 +236,28 @@ void multipleLevel(const std::string& configPath)
     para->addActuator( actuator_farm );
 
 
-    // SPtr<PointProbe> pointProbe = std::make_shared<PointProbe>("pointProbe", para->getOutputPath(), 100, 1, 500, 100);
-    // std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
-    // std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
-    // std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
-    // pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
-    // // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
-
-    // pointProbe->addStatistic(Statistic::Means);
-    // pointProbe->addStatistic(Statistic::Variances);
-    // para->addProbe( pointProbe );
-
-    // SPtr<PlaneProbe> planeProbe = std::make_shared<PlaneProbe>("planeProbe", para->getOutputPath(), 100, 500, 100, 100);
-    // planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
-    // planeProbe->addStatistic(Statistic::Means);
-    // para->addProbe( planeProbe );
+    SPtr<PointProbe> pointProbe = std::make_shared<PointProbe>("pointProbe", para->getOutputPath(), 100, 1, 500, 100, false);
+    std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
+    std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+    std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+
+    pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
+    // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
+
+    pointProbe->addStatistic(Statistic::Means);
+    pointProbe->addStatistic(Statistic::Variances);
+    para->addProbe( pointProbe );
+
+    SPtr<PointProbe> timeseriesProbe = std::make_shared<PointProbe>("timeProbe", para->getOutputPath(), 100, 1, 500, 100, true);
+    timeseriesProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
+    timeseriesProbe->addStatistic(Statistic::Instantaneous);
+    timeseriesProbe->addStatistic(Statistic::Means);
+    para->addProbe( timeseriesProbe );
+
+    SPtr<PlaneProbe> planeProbe = std::make_shared<PlaneProbe>("planeProbe", para->getOutputPath(), 100, 500, 100, 100);
+    planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
+    planeProbe->addStatistic(Statistic::Means);
+    para->addProbe( planeProbe );
 
 
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
diff --git a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
index 83e7861a5fb85ea800d187699f1c6c1409422f0a..1b415aa6ce14109c2d93a17ae78463786a7b7608 100644
--- a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
+++ b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
@@ -23,8 +23,7 @@ Ma = 0.1
 nz = 96 
 
 bodyForce = true
-UseAMD = true
-SGSconstant = 0.2
+TurbulenceModel = QR
 QuadricLimiterP = 100000.0
 QuadricLimiterM = 100000.0
 QuadricLimiterD = 100000.0
diff --git a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
index 126ff07a31c4f434cc82b4bb8b7e1d944b22cae1..15032a8fd4ec6ce8f7684c33ceae0085d92f54c4 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
@@ -40,7 +40,7 @@ void AnalyticalResults2DToVTKWriterImp::writeAnalyticalResult(std::shared_ptr<Pa
         for (int level = para->getCoarse(); level <= para->getFine(); level++) {
 #pragma omp parallel for
             for (int timeStep = 0; timeStep < analyticalResult->getNumberOfTimeSteps(); timeStep++) {
-                const unsigned int numberOfParts = para->getParH(level)->size_Mat / para->getlimitOfNodesForVTK() + 1;
+                const unsigned int numberOfParts = para->getParH(level)->size_Mat / para->getLimitOfNodesForVTK() + 1;
                 std::vector<std::string> fname;
                 unsigned int time =
                     analyticalResult->getTimeSteps().at(timeStep) * analyticalResult->getTimeStepLength();
@@ -93,13 +93,13 @@ void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter>
     std::vector<double> vz = analyticalResult->getVz()[timeStep];
 
     for (unsigned int part = 0; part < fname.size(); part++) {
-        if (((part + 1) * para->getlimitOfNodesForVTK()) > para->getParH(level)->size_Mat)
-            sizeOfNodes = para->getParH(level)->size_Mat - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1) * para->getLimitOfNodesForVTK()) > para->getParH(level)->size_Mat)
+            sizeOfNodes = para->getParH(level)->size_Mat - (part * para->getLimitOfNodesForVTK());
         else
-            sizeOfNodes = para->getlimitOfNodesForVTK();
+            sizeOfNodes = para->getLimitOfNodesForVTK();
 
         //////////////////////////////////////////////////////////////////////////
-        startpos = part * para->getlimitOfNodesForVTK();
+        startpos = part * para->getLimitOfNodesForVTK();
         endpos = startpos + sizeOfNodes;
         //////////////////////////////////////////////////////////////////////////
         cells.clear();
diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt
index 037b68baf53d5da8a1ccd20155cb0e7be483176b..7d09a87b8ad3a24334c60bf318523c0bc6702755 100644
--- a/pythonbindings/CMakeLists.txt
+++ b/pythonbindings/CMakeLists.txt
@@ -7,7 +7,6 @@ endif()
 project(VirtualFluidsPython LANGUAGES ${PYFLUIDS_LANGUAGES})
 
 pybind11_add_module(python_bindings MODULE src/VirtualFluids.cpp)
-target_compile_definitions(python_bindings PUBLIC VF_DOUBLE_ACCURACY)
 
 set_target_properties(  python_bindings PROPERTIES
                         LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pyfluids
@@ -25,6 +24,7 @@ IF(BUILD_VF_GPU)
 ENDIF()
 
 IF(BUILD_VF_CPU)
+    target_compile_definitions(python_bindings PUBLIC VF_DOUBLE_ACCURACY)
     target_compile_definitions(python_bindings PRIVATE VF_METIS VF_MPI VF_CPU_PYTHONBINDINGS)
     target_link_libraries(python_bindings PRIVATE simulationconfig VirtualFluidsCore muparser)
 
diff --git a/pythonbindings/pyfluids-stubs/__init__.pyi b/pythonbindings/pyfluids-stubs/__init__.pyi
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4dc4ea07b2c47336f8e5b2dd6c5aa6af8dab4427 100644
--- a/pythonbindings/pyfluids-stubs/__init__.pyi
+++ b/pythonbindings/pyfluids-stubs/__init__.pyi
@@ -0,0 +1 @@
+from . import bindings as bindings
diff --git a/pythonbindings/pyfluids-stubs/bindings/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/__init__.pyi
index 4e7f353eab97cc536f8f18e72319af1cd7a1916a..337d26e6b77338facc26fa5abf65d8f15f413169 100644
--- a/pythonbindings/pyfluids-stubs/bindings/__init__.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/__init__.pyi
@@ -32,6 +32,9 @@ r"""
 ! \author Henry Korb
 =======================================================================================
 """
+from . import basics as basics, gpu as gpu, lbm as lbm, logger as logger
+
+
 class ostream_redirect:
     def __init__(self, stdout: bool = ..., stderr: bool = ...) -> None: ...
     def __enter__(self) -> None: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
index a646f7e590e2aba91ab1c367f75b8c6ebe8f79ae..ed74e648a0b6739b31853c9633d63eeac3b91df4 100644
--- a/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
@@ -32,10 +32,13 @@ r"""
 ! \author Henry Korb
 =======================================================================================
 """
-from typing import ClassVar
+from __future__ import annotations
 
-from typing import overload
+from typing import ClassVar, overload
 
+from . import logger as logger
+
+from pyfluids.bindings.basics import logger as logger
 class ConfigurationFile:
     def __init__(self) -> None: ...
     def contains(self, key: str) -> bool: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi
index 64a598ee1974b089393b328566def02fb3600005..d31c6b464de55cedd1e241c4d2b5175ab9752e8c 100644
--- a/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi
@@ -32,77 +32,88 @@ r"""
 ! \author Henry Korb
 =======================================================================================
 """
-from typing import Any, Callable, ClassVar, List, Optional
+from __future__ import annotations
+from typing import Callable, ClassVar, List, Optional
 
-from typing import overload
-import numpy
+from typing import overload, Union
+import numpy as np
+import numpy.typing as npt
 import pyfluids.bindings.basics
-import pyfluids.bindings.gpu.grid_generator as grid_generator
+
+from  pyfluids.bindings.gpu import grid_generator as grid_generator
+from pyfluids.bindings.gpu import probes as probes
+
+class PreCollisionInteractor:
+    def __init__(self, *args, **kwargs) -> None: ...
+
+
+class FileCollection:
+    def __init__(self, *args, **kwargs) -> None: ...
 
 class ActuatorFarm(PreCollisionInteractor):
     def __init__(self, number_of_blades_per_turbine: int, density: float, number_of_nodes_per_blade: int, epsilon: float, level: int, delta_t: float, delta_x: float, use_host_arrays: bool) -> None: ...
     def add_turbine(self, posX: float, posY: float, posZ: float, diameter: float, omega: float, azimuth: float, yaw: float, bladeRadii: List[float]) -> None: ...
     def calc_blade_forces(self) -> None: ...
-    def get_all_azimuths(self) -> numpy.ndarray[numpy.float32]: ...
-    def get_all_blade_coords_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_azimuths(self) -> npt.NDArray[np.float32]: ...
+    def get_all_blade_coords_x(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_coords_x_device(self) -> int: ...
-    def get_all_blade_coords_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_coords_y(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_coords_y_device(self) -> int: ...
-    def get_all_blade_coords_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_coords_z(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_coords_z_device(self) -> int: ...
-    def get_all_blade_forces_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_forces_x(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_forces_x_device(self) -> int: ...
-    def get_all_blade_forces_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_forces_y(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_forces_y_device(self) -> int: ...
-    def get_all_blade_forces_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_forces_z(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_forces_z_device(self) -> int: ...
-    def get_all_blade_radii(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_radii(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_radii_device(self) -> int: ...
-    def get_all_blade_velocities_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_velocities_x(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_velocities_x_device(self) -> int: ...
-    def get_all_blade_velocities_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_velocities_y(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_velocities_y_device(self) -> int: ...
-    def get_all_blade_velocities_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_velocities_z(self) -> npt.NDArray[np.float32]: ...
     def get_all_blade_velocities_z_device(self) -> int: ...
-    def get_all_omegas(self) -> numpy.ndarray[numpy.float32]: ...
-    def get_all_turbine_pos_x(self) -> numpy.ndarray[numpy.float32]: ...
-    def get_all_turbine_pos_y(self) -> numpy.ndarray[numpy.float32]: ...
-    def get_all_turbine_pos_z(self) -> numpy.ndarray[numpy.float32]: ...
-    def get_all_yaws(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_omegas(self) -> npt.NDArray[np.float32]: ...
+    def get_all_turbine_pos_x(self) -> npt.NDArray[np.float32]: ...
+    def get_all_turbine_pos_y(self) -> npt.NDArray[np.float32]: ...
+    def get_all_turbine_pos_z(self) -> npt.NDArray[np.float32]: ...
+    def get_all_yaws(self) -> npt.NDArray[np.float32]: ...
     def get_turbine_azimuth(self, turbine: int) -> float: ...
-    def get_turbine_blade_coords_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_coords_x(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_coords_x_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_coords_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_coords_y(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_coords_y_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_coords_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_coords_z(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_coords_z_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_forces_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_forces_x(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_forces_x_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_forces_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_forces_y(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_forces_y_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_forces_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_forces_z(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_forces_z_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_radii(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_radii(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_radii_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_velocities_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_velocities_x(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_velocities_x_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_velocities_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_velocities_y(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_velocities_y_device(self, turbine: int) -> int: ...
-    def get_turbine_blade_velocities_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_velocities_z(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_blade_velocities_z_device(self, turbine: int) -> int: ...
     def get_turbine_omega(self, turbine: int) -> float: ...
-    def get_turbine_pos(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_pos(self, turbine: int) -> npt.NDArray[np.float32]: ...
     def get_turbine_yaw(self, turbine: int) -> float: ...
-    def set_all_azimuths(self, azimuths: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_all_blade_coords(self, blade_coords_x: numpy.ndarray[numpy.float32], blade_coords_y: numpy.ndarray[numpy.float32], blade_coords_z: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_all_blade_forces(self, blade_forces_x: numpy.ndarray[numpy.float32], blade_forces_y: numpy.ndarray[numpy.float32], blade_forces_z: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_all_blade_velocities(self, blade_velocities_x: numpy.ndarray[numpy.float32], blade_velocities_y: numpy.ndarray[numpy.float32], blade_velocities_z: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_all_omegas(self, omegas: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_all_yaws(self, yaws: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_all_azimuths(self, azimuths: npt.NDArray[np.float32]) -> None: ...
+    def set_all_blade_coords(self, blade_coords_x: npt.NDArray[np.float32], blade_coords_y: npt.NDArray[np.float32], blade_coords_z: npt.NDArray[np.float32]) -> None: ...
+    def set_all_blade_forces(self, blade_forces_x: npt.NDArray[np.float32], blade_forces_y: npt.NDArray[np.float32], blade_forces_z: npt.NDArray[np.float32]) -> None: ...
+    def set_all_blade_velocities(self, blade_velocities_x: npt.NDArray[np.float32], blade_velocities_y: npt.NDArray[np.float32], blade_velocities_z: npt.NDArray[np.float32]) -> None: ...
+    def set_all_omegas(self, omegas: npt.NDArray[np.float32]) -> None: ...
+    def set_all_yaws(self, yaws: npt.NDArray[np.float32]) -> None: ...
     def set_turbine_azimuth(self, turbine: int, azimuth: float) -> None: ...
-    def set_turbine_blade_coords(self, turbine: int, blade_coords_x: numpy.ndarray[numpy.float32], blade_coords_y: numpy.ndarray[numpy.float32], blade_coords_z: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_turbine_blade_forces(self, turbine: int, blade_forces_x: numpy.ndarray[numpy.float32], blade_forces_y: numpy.ndarray[numpy.float32], blade_forces_z: numpy.ndarray[numpy.float32]) -> None: ...
-    def set_turbine_blade_velocities(self, turbine: int, blade_velocities_x: numpy.ndarray[numpy.float32], blade_velocities_y: numpy.ndarray[numpy.float32], blade_velocities_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_turbine_blade_coords(self, turbine: int, blade_coords_x: npt.NDArray[np.float32], blade_coords_y: npt.NDArray[np.float32], blade_coords_z: npt.NDArray[np.float32]) -> None: ...
+    def set_turbine_blade_forces(self, turbine: int, blade_forces_x: npt.NDArray[np.float32], blade_forces_y: npt.NDArray[np.float32], blade_forces_z: npt.NDArray[np.float32]) -> None: ...
+    def set_turbine_blade_velocities(self, turbine: int, blade_velocities_x: npt.NDArray[np.float32], blade_velocities_y: npt.NDArray[np.float32], blade_velocities_z: npt.NDArray[np.float32]) -> None: ...
     def set_turbine_omega(self, turbine: int, omega: float) -> None: ...
     def set_turbine_yaw(self, turbine: int, yaw: float) -> None: ...
     @property
@@ -116,21 +127,23 @@ class ActuatorFarm(PreCollisionInteractor):
     @property
     def number_of_indices(self) -> int: ...
     @property
-    def number_of_nodes(self) -> int: ...
+    def number_of_grid_nodes(self) -> int: ...
     @property
     def number_of_nodes_per_blade(self) -> int: ...
     @property
     def number_of_turbines(self) -> int: ...
 
+
 class BoundaryConditionFactory:
     def __init__(self) -> None: ...
-    def set_geometry_boundary_condition(self, boundary_condition_type) -> None: ...
-    def set_no_slip_boundary_condition(self, boundary_condition_type) -> None: ...
-    def set_precursor_boundary_condition(self, boundary_condition_type) -> None: ...
-    def set_pressure_boundary_condition(self, boundary_condition_type) -> None: ...
-    def set_slip_boundary_condition(self, boundary_condition_type) -> None: ...
-    def set_stress_boundary_condition(self, boundary_condition_type) -> None: ...
-    def set_velocity_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_geometry_boundary_condition(self, boundary_condition_type: Union[SlipBC, VelocityBC, NoSlipBC]) -> None: ...
+    def set_no_slip_boundary_condition(self, boundary_condition_type: NoSlipBC) -> None: ...
+    def set_precursor_boundary_condition(self, boundary_condition_type: PrecursorBC) -> None: ...
+    def set_pressure_boundary_condition(self, boundary_condition_type: PressureBC) -> None: ...
+    def set_slip_boundary_condition(self, boundary_condition_type: SlipBC) -> None: ...
+    def set_stress_boundary_condition(self, boundary_condition_type: StressBC) -> None: ...
+    def set_velocity_boundary_condition(self, boundary_condition_type: VelocityBC) -> None: ...
+
 
 class MpiCommunicator:
     def __init__(self, *args, **kwargs) -> None: ...
@@ -139,9 +152,11 @@ class MpiCommunicator:
     def get_number_of_process(self) -> int: ...
     def get_pid(self) -> int: ...
 
+
 class CudaMemoryManager:
     def __init__(self, parameter: Parameter) -> None: ...
 
+
 class FileType:
     __members__: ClassVar[dict] = ...  # read-only
     VTK: ClassVar[FileType] = ...
@@ -157,11 +172,13 @@ class FileType:
     @property
     def name(self) -> str: ...
 
+
 class GridProvider:
     def __init__(self, *args, **kwargs) -> None: ...
     @staticmethod
     def make_grid_generator(builder: grid_generator.GridBuilder, para: Parameter, cuda_memory_manager: CudaMemoryManager, communicator: Communicator) -> GridProvider: ...
 
+
 class GridScaling:
     __members__: ClassVar[dict] = ...  # read-only
     NotSpecified: ClassVar[GridScaling] = ...
@@ -179,10 +196,12 @@ class GridScaling:
     @property
     def name(self) -> str: ...
 
+
 class GridScalingFactory:
     def __init__(self) -> None: ...
     def set_scaling_factory(self, scaling_type) -> None: ...
 
+
 class NoSlipBC:
     __members__: ClassVar[dict] = ...  # read-only
     NoSlip3rdMomentsCompressible: ClassVar[NoSlipBC] = ...
@@ -202,6 +221,7 @@ class NoSlipBC:
     @property
     def name(self) -> str: ...
 
+
 class OutputVariable:
     __members__: ClassVar[dict] = ...  # read-only
     Distributions: ClassVar[OutputVariable] = ...
@@ -218,6 +238,7 @@ class OutputVariable:
     @property
     def name(self) -> str: ...
 
+
 class Parameter:
     @overload
     def __init__(self, number_of_processes: int, my_ID: int, config_data: Optional[pyfluids.bindings.basics.ConfigurationFile]) -> None: ...
@@ -245,7 +266,7 @@ class Parameter:
     def set_diff_on(self, is_diff: bool) -> None: ...
     def set_forcing(self, forcing_x: float, forcing_y: float, forcing_z: float) -> None: ...
     def set_has_wall_model_monitor(self, has_wall_monitor: bool) -> None: ...
-    def set_initial_condition(self, init_func: Callable[[float,float,float],List[float]]) -> None: ...
+    def set_initial_condition(self, init_func: Callable[[float, float, float], List[float]]) -> None: ...
     def set_initial_condition_log_law(self, u_star: float, z0: float, velocity_ratio: float) -> None: ...
     def set_initial_condition_perturbed_log_law(self, u_star: float, z0: float, length_x: float, length_z: float, height: float, velocity_ratio: float) -> None: ...
     def set_initial_condition_uniform(self, velocity_x: float, velocity_y: float, velocity_z: float) -> None: ...
@@ -270,8 +291,6 @@ class Parameter:
     def set_viscosity_LB(self, viscosity: float) -> None: ...
     def set_viscosity_ratio(self, viscosity_ratio: float) -> None: ...
 
-class PreCollisionInteractor:
-    def __init__(self, *args, **kwargs) -> None: ...
 
 class PrecursorBC:
     __members__: ClassVar[dict] = ...  # read-only
@@ -290,9 +309,11 @@ class PrecursorBC:
     @property
     def name(self) -> str: ...
 
+
 class PrecursorWriter(PreCollisionInteractor):
     def __init__(self, filename: str, output_path: str, x_pos: float, y_min: float, y_max: float, z_min: float, z_max: float, t_start_out: int, t_save: int, output_variable: OutputVariable, max_timesteps_per_file: int) -> None: ...
 
+
 class PressureBC:
     __members__: ClassVar[dict] = ...  # read-only
     NotSpecified: ClassVar[PressureBC] = ...
@@ -314,6 +335,7 @@ class PressureBC:
     @property
     def name(self) -> str: ...
 
+
 class SideType:
     __members__: ClassVar[dict] = ...  # read-only
     GEOMETRY: ClassVar[SideType] = ...
@@ -335,6 +357,7 @@ class SideType:
     @property
     def name(self) -> str: ...
 
+
 class Simulation:
     @overload
     def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory, gridScalingFactory: GridScalingFactory) -> None: ...
@@ -346,6 +369,7 @@ class Simulation:
     def addKineticEnergyAnalyzer(self, t_analyse: int) -> None: ...
     def run(self) -> None: ...
 
+
 class SlipBC:
     __members__: ClassVar[dict] = ...  # read-only
     NotSpecified: ClassVar[SlipBC] = ...
@@ -366,6 +390,7 @@ class SlipBC:
     @property
     def name(self) -> str: ...
 
+
 class StressBC:
     __members__: ClassVar[dict] = ...  # read-only
     NotSpecified: ClassVar[StressBC] = ...
@@ -384,6 +409,7 @@ class StressBC:
     @property
     def name(self) -> str: ...
 
+
 class TurbulenceModel:
     __members__: ClassVar[dict] = ...  # read-only
     AMD: ClassVar[TurbulenceModel] = ...
@@ -402,15 +428,18 @@ class TurbulenceModel:
     @property
     def name(self) -> str: ...
 
+
 class TurbulenceModelFactory:
     def __init__(self, para: Parameter) -> None: ...
     def read_config_file(self, config_data: pyfluids.bindings.basics.ConfigurationFile) -> None: ...
     def set_model_constant(self, model_constant: float) -> None: ...
     def set_turbulence_model(self, turbulence_model: TurbulenceModel) -> None: ...
 
+
 class VTKFileCollection(FileCollection):
     def __init__(self, prefix: str) -> None: ...
 
+
 class VelocityBC:
     __members__: ClassVar[dict] = ...  # read-only
     NotSpecified: ClassVar[VelocityBC] = ...
@@ -430,7 +459,5 @@ class VelocityBC:
     @property
     def name(self) -> str: ...
 
-class FileCollection:
-    def __init__(self, *args, **kwargs) -> None: ...
 
 def create_file_collection(prefix: str, type: FileType) -> FileCollection: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
index 433a20e7efe472bd791b1d2a0f43859676e8fcf0..69478390236c56c9e5880348daf676f6d626033b 100644
--- a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
@@ -32,15 +32,23 @@ r"""
 ! \author Henry Korb
 =======================================================================================
 """
-from typing import Any, List
+from __future__ import annotations
+
+from typing import List
 
 from typing import overload
 import pyfluids.bindings.basics
 import pyfluids.bindings.gpu
 
+
 class BoundingBox:
     def __init__(self, min_x: float, max_x: float, min_y: float, max_y: float, min_z: float, max_z: float) -> None: ...
 
+
+class Object:
+    def __init__(self, *args, **kwargs) -> None: ...
+
+
 class Conglomerate(Object):
     def __init__(self, *args, **kwargs) -> None: ...
     def add(self, object: Object) -> None: ...
@@ -48,28 +56,33 @@ class Conglomerate(Object):
     def make_shared() -> Conglomerate: ...
     def subtract(self, object: Object) -> None: ...
 
+
 class Cuboid(Object):
     def __init__(self, min_x1: float, min_x2: float, min_x3: float, max_x1: float, max_x2: float, max_x3: float) -> None: ...
 
+
 class GridBuilder:
     def __init__(self, *args, **kwargs) -> None: ...
     def get_number_of_grid_levels(self) -> int: ...
 
+
 class GridFactory:
     def __init__(self, *args, **kwargs) -> None: ...
     @staticmethod
     def make() -> GridFactory: ...
 
+
 class LevelGridBuilder(GridBuilder):
     def __init__(self, *args, **kwargs) -> None: ...
     def set_no_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType) -> None: ...
     def set_periodic_boundary_condition(self, periodic_x: bool, periodic_y: bool, periodic_z: bool) -> None: ...
-    def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.VelocityFileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ...
+    def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.FileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ...
     def set_pressure_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, rho: float) -> None: ...
     def set_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float) -> None: ...
-    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float, q: float) -> None: ...
+    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float, q: float = ...) -> None: ...
     def set_velocity_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, vx: float, vy: float, vz: float) -> None: ...
 
+
 class MultipleGridBuilder(LevelGridBuilder):
     def __init__(self, *args, **kwargs) -> None: ...
     def add_coarse_grid(self, start_x: float, start_y: float, start_z: float, end_x: float, end_y: float, end_z: float, delta: float) -> None: ...
@@ -86,14 +99,13 @@ class MultipleGridBuilder(LevelGridBuilder):
     @staticmethod
     def make_shared(grid_factory: GridFactory) -> MultipleGridBuilder: ...
 
-class Object:
-    def __init__(self, *args, **kwargs) -> None: ...
 
 class Sphere(Object):
     def __init__(self, *args, **kwargs) -> None: ...
     @staticmethod
     def make_shared() -> Sphere: ...
 
+
 class TriangularMesh(Object):
     def __init__(self, *args, **kwargs) -> None: ...
     @staticmethod
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi
index af9c40078e6009efebda4450b5c5e23586aa1e83..c99aec5568c0725319557931c15a02988a2b4d23 100644
--- a/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi
@@ -32,27 +32,11 @@ r"""
 ! \author Henry Korb
 =======================================================================================
 """
+from __future__ import annotations
 from typing import ClassVar, List
 
-import pyfluids.bindings.gpu
+from pyfluids.bindings.gpu import PreCollisionInteractor
 
-class PlanarAverageProbe(Probe):
-    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int, plane_normal: str) -> None: ...
-
-class PlaneProbe(Probe):
-    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ...
-    def set_probe_plane(self, pos_x: float, pos_y: float, pos_z: float, delta_x: float, delta_y: float, delta_z: float) -> None: ...
-
-class PointProbe(Probe):
-    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int, output_timeseries: bool) -> None: ...
-    def add_probe_points_from_list(self, point_coords_x: List[float], point_coords_y: List[float], point_coords_z: List[float]) -> None: ...
-    def add_probe_points_from_x_normal_plane(self, pos_x: float, pos0_y: float, pos0_z: float, pos1_y: float, pos1_z: float, n_y: int, n_z: int) -> None: ...
-
-class Probe(pyfluids.bindings.gpu.PreCollisionInteractor):
-    def __init__(self, *args, **kwargs) -> None: ...
-    def add_all_available_statistics(self) -> None: ...
-    def add_statistic(self, variable: Statistic) -> None: ...
-    def set_file_name_to_n_out(self) -> None: ...
 
 class Statistic:
     __members__: ClassVar[dict] = ...  # read-only
@@ -79,6 +63,30 @@ class Statistic:
     @property
     def name(self) -> str: ...
 
+
+class Probe(PreCollisionInteractor):
+    def __init__(self, *args, **kwargs) -> None: ...
+    def add_all_available_statistics(self) -> None: ...
+    def add_statistic(self, variable: Statistic) -> None: ...
+    def set_file_name_to_n_out(self) -> None: ...
+
+
+class PlanarAverageProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int, plane_normal: str) -> None: ...
+
+
+class PlaneProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ...
+    def set_probe_plane(self, pos_x: float, pos_y: float, pos_z: float, delta_x: float, delta_y: float, delta_z: float) -> None: ...
+
+
+class PointProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int, output_timeseries: bool) -> None: ...
+    def add_probe_point(self, point_coord_x: float, point_coord_y: float, point_coord_z: float) -> None: ...
+    def add_probe_points_from_list(self, point_coords_x: List[float], point_coords_y: List[float], point_coords_z: List[float]) -> None: ...
+    def add_probe_points_from_x_normal_plane(self, pos_x: float, pos0_y: float, pos0_z: float, pos1_y: float, pos1_z: float, n_y: int, n_z: int) -> None: ...
+
+
 class WallModelProbe(Probe):
     def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ...
     def set_evaluate_pressure_gradient(self, eval_press_grad: bool) -> None: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/logger.pyi b/pythonbindings/pyfluids-stubs/bindings/logger.pyi
index fe84eeb18f3245ef72ed023b2de9db7b9131d144..91c1346c2de2313de2c8df4a0b6d5eb4f4ab87bd 100644
--- a/pythonbindings/pyfluids-stubs/bindings/logger.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/logger.pyi
@@ -32,12 +32,16 @@ r"""
 ! \author Henry Korb
 =======================================================================================
 """
+from __future__ import annotations
+
+
 class Logger:
     @staticmethod
     def change_log_path(path: str) -> None: ...
     @staticmethod
     def initialize_logger() -> None: ...
 
+
 def vf_log_critical(message: str) -> None: ...
 def vf_log_debug(message: str) -> None: ...
 def vf_log_info(message: str) -> None: ...
diff --git a/pythonbindings/pyfluids/timeseries_probe_reader.py b/pythonbindings/pyfluids/timeseries_probe_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8aa7af05a82b92a1a9cab9ea791de897ac89c13
--- /dev/null
+++ b/pythonbindings/pyfluids/timeseries_probe_reader.py
@@ -0,0 +1,47 @@
+import numpy as np
+from pathlib import Path
+import pandas as pd
+#%%
+
+
+class TimeseriesProbeReader:
+    def __init__(self, file: Path):
+        self.file = file
+        self.quants, self.positions, self.data = \
+            self.read_file()
+
+    def read_file(self):
+        with open(self.file, "rb") as f:
+            header_length = 0
+            header_length += len(f.readline()) # first line
+            quant_line = f.readline()
+            header_length += len(f.readline()) # number of points
+            number_of_points_line = f.readline()
+            header_length += len(f.readline()) # positions
+            n_points = int(number_of_points_line)
+            positions = np.zeros((n_points, 3))
+            for i in range(n_points):
+                pos_line = f.readline()
+                header_length += len(pos_line)
+                positions[i] = [float(pos) for pos in pos_line.split(b", ")]
+
+        header_length += len(quant_line)
+        header_length += len(number_of_points_line)
+
+        quants = quant_line.decode().split(" ")[1:-1]
+        n_quants = len(quants)
+        data = np.fromfile(self.file, dtype=np.float32, offset=header_length)
+        n_timesteps = len(data)//(n_quants*n_points+1)
+        return quants, positions, data.reshape(n_timesteps, n_points*n_quants+1)
+    
+    def get_data(self):
+        return self.data
+    
+    def get_positions(self):
+        return self.positions
+    
+    def get_quantities(self):
+        return self.quants
+    
+    def to_dataframe(self):
+        return pd.DataFrame(self.data[:,1:], columns=self.quants, index=self.data[:,0])
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/actuator_farm.cpp b/pythonbindings/src/gpu/submodules/actuator_farm.cpp
index a930616db3e0d0713bdf57157387d75d171603de..86e3f5a42152e60d6b62e32fb894406efc8384ea 100644
--- a/pythonbindings/src/gpu/submodules/actuator_farm.cpp
+++ b/pythonbindings/src/gpu/submodules/actuator_farm.cpp
@@ -30,10 +30,13 @@
 //! \ingroup submodules
 //! \author Henry Korb
 //=======================================================================================
+
 #include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+
+
 class PyActuatorFarm : public ActuatorFarm 
 {
 public:
@@ -43,13 +46,20 @@ public:
         PYBIND11_OVERRIDE_NAME(void, ActuatorFarm, "calc_blade_forces", calcBladeForces); 
     }
 };
+
 namespace actuator_farm
 {
     namespace py = pybind11;
 
+    template<class dtype>
+    dtype* np_to_arr(py::array_t<dtype, py::array::c_style> array){ return static_cast<dtype *>(array.request().ptr); };
+
+    template<class dtype>
+    intptr_t arr_to_cp(dtype* array){ return reinterpret_cast<intptr_t>(array); };
+
     void makeModule(py::module_ &parentModule)
     {
-        using arr = py::array_t<float, py::array::c_style>;
+        using arr = py::array_t<real, py::array::c_style>;
         
         py::class_<ActuatorFarm, PreCollisionInteractor, PyActuatorFarm, std::shared_ptr<ActuatorFarm>>(parentModule, "ActuatorFarm", py::dynamic_attr())
         .def(py::init<  const uint,
@@ -71,7 +81,7 @@ namespace actuator_farm
         .def_property_readonly("number_of_turbines", &ActuatorFarm::getNumberOfTurbines)
         .def_property_readonly("number_of_nodes_per_blade", &ActuatorFarm::getNumberOfNodesPerBlade)
         .def_property_readonly("number_of_blades_per_turbine", &ActuatorFarm::getNumberOfBladesPerTurbine)
-        .def_property_readonly("number_of_nodes", &ActuatorFarm::getNumberOfNodes)
+        .def_property_readonly("number_of_grid_nodes", &ActuatorFarm::getNumberOfGridNodes)
         .def_property_readonly("number_of_indices", &ActuatorFarm::getNumberOfIndices)
         .def_property_readonly("density", &ActuatorFarm::getDensity)
         .def_property_readonly("delta_t", &ActuatorFarm::getDeltaT)
@@ -79,7 +89,9 @@ namespace actuator_farm
 
         .def("add_turbine", &ActuatorFarm::addTurbine, py::arg("posX"), py::arg("posY"), py::arg("posZ"), py::arg("diameter"), py::arg("omega"), py::arg("azimuth"), py::arg("yaw"), py::arg("bladeRadii"))
 
-        .def("get_turbine_pos", [](ActuatorFarm& al, uint turbine){ real position[3] = {al.getTurbinePosX(turbine), al.getTurbinePosY(turbine), al.getTurbinePosZ(turbine)}; return arr(3,  position); }, py::arg("turbine"))
+        .def("get_turbine_pos", [](ActuatorFarm& al, uint turbine){ 
+            real position[3] = {al.getTurbinePosX(turbine), al.getTurbinePosY(turbine), al.getTurbinePosZ(turbine)}; return arr(3,  position);
+            }, py::arg("turbine"))
         .def("get_turbine_azimuth", &ActuatorFarm::getTurbineAzimuth, py::arg("turbine"))
         .def("get_turbine_yaw", &ActuatorFarm::getTurbineYaw, py::arg("turbine"))
         .def("get_turbine_omega", &ActuatorFarm::getTurbineOmega, py::arg("turbine"))
@@ -112,59 +124,53 @@ namespace actuator_farm
         .def("get_turbine_blade_forces_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") )
         .def("get_turbine_blade_forces_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") )
 
-        .def("get_all_blade_radii_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getAllBladeRadiiDevice()); } )
-        .def("get_all_blade_coords_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsXDevice()); } )
-        .def("get_all_blade_coords_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsYDevice()); } )
-        .def("get_all_blade_coords_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsZDevice()); } )        
-        .def("get_all_blade_velocities_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesXDevice()); } )
-        .def("get_all_blade_velocities_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesYDevice()); } )
-        .def("get_all_blade_velocities_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesZDevice()); } )
-        .def("get_all_blade_forces_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesXDevice()); } )
-        .def("get_all_blade_forces_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesYDevice()); } )
-        .def("get_all_blade_forces_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesZDevice()); } )
+        .def("get_all_blade_radii_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeRadiiDevice()); } )
+        .def("get_all_blade_coords_x_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeCoordsXDevice()); } )
+        .def("get_all_blade_coords_y_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeCoordsYDevice()); } )
+        .def("get_all_blade_coords_z_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeCoordsZDevice()); } )        
+        .def("get_all_blade_velocities_x_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeVelocitiesXDevice()); } )
+        .def("get_all_blade_velocities_y_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeVelocitiesYDevice()); } )
+        .def("get_all_blade_velocities_z_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeVelocitiesZDevice()); } )
+        .def("get_all_blade_forces_x_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeForcesXDevice()); } )
+        .def("get_all_blade_forces_y_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeForcesYDevice()); } )
+        .def("get_all_blade_forces_z_device", [](ActuatorFarm& al) -> intptr_t { return arr_to_cp(al.getAllBladeForcesZDevice()); } )
 
-        .def("get_turbine_blade_radii_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeRadiiDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_coords_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsXDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_coords_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsYDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_coords_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsZDevice(turbine)); }, py::arg("turbine") )        
-        .def("get_turbine_blade_velocities_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesXDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_velocities_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesYDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_velocities_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesZDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_forces_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesXDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_forces_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") )
-        .def("get_turbine_blade_forces_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_radii_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeRadiiDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeCoordsXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeCoordsYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeCoordsZDevice(turbine)); }, py::arg("turbine") )        
+        .def("get_turbine_blade_velocities_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeVelocitiesXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_velocities_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeVelocitiesYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_velocities_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeVelocitiesZDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeForcesXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return arr_to_cp(al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") )
 
-        .def("set_all_azimuths", [](ActuatorFarm& al, arr azimuths){ al.setAllAzimuths(static_cast<float *>(azimuths.request().ptr)); }, py::arg("azimuths"))
-        .def("set_all_yaws", [](ActuatorFarm& al, arr yaws){ al.setAllYaws(static_cast<float *>(yaws.request().ptr)); }, py::arg("yaws"))
-        .def("set_all_omegas", [](ActuatorFarm& al, arr omegas){ al.setAllOmegas(static_cast<float *>(omegas.request().ptr)); }, py::arg("omegas"))
+        .def("set_all_azimuths", [](ActuatorFarm& al, arr azimuths){ al.setAllAzimuths(np_to_arr(azimuths)); }, py::arg("azimuths"))
+        .def("set_all_yaws", [](ActuatorFarm& al, arr yaws){ al.setAllYaws(np_to_arr(yaws)); }, py::arg("yaws"))
+        .def("set_all_omegas", [](ActuatorFarm& al, arr omegas){ al.setAllOmegas(np_to_arr(omegas)); }, py::arg("omegas"))
 
         .def("set_turbine_azimuth", &ActuatorFarm::setTurbineAzimuth, py::arg("turbine"), py::arg("azimuth"))
         .def("set_turbine_yaw", &ActuatorFarm::setTurbineYaw, py::arg("turbine"), py::arg("yaw"))
         .def("set_turbine_omega", &ActuatorFarm::setTurbineOmega, py::arg("turbine"), py::arg("omega"))
 
-        .def("set_all_blade_coords", [](ActuatorFarm& al, arr coordsX, arr coordsY, arr coordsZ)
-        { 
-            al.setAllBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); 
+        .def("set_all_blade_coords", [](ActuatorFarm& al, arr coordsX, arr coordsY, arr coordsZ){ 
+            al.setAllBladeCoords(np_to_arr(coordsX), np_to_arr(coordsY), np_to_arr(coordsZ)); 
         }, py::arg("blade_coords_x"), py::arg("blade_coords_y"), py::arg("blade_coords_z") )
-        .def("set_all_blade_velocities", [](ActuatorFarm& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ)
-        { 
-            al.setAllBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); 
+        .def("set_all_blade_velocities", [](ActuatorFarm& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ){ 
+            al.setAllBladeVelocities(np_to_arr(velocitiesX), np_to_arr(velocitiesY), np_to_arr(velocitiesZ)); 
         }, py::arg("blade_velocities_x"), py::arg("blade_velocities_y"), py::arg("blade_velocities_z") )
-        .def("set_all_blade_forces", [](ActuatorFarm& al, arr forcesX, arr forcesY, arr forcesZ)
-        { 
-            al.setAllBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr));
+        .def("set_all_blade_forces", [](ActuatorFarm& al, arr forcesX, arr forcesY, arr forcesZ){ 
+            al.setAllBladeForces(np_to_arr(forcesX), np_to_arr(forcesY), np_to_arr(forcesZ));
         }, py::arg("blade_forces_x"), py::arg("blade_forces_y"), py::arg("blade_forces_z") )     
-        .def("set_turbine_blade_coords", [](ActuatorFarm& al, uint turbine, arr coordsX, arr coordsY, arr coordsZ)
-        { 
-            al.setTurbineBladeCoords(turbine, static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); 
+        .def("set_turbine_blade_coords", [](ActuatorFarm& al, uint turbine, arr coordsX, arr coordsY, arr coordsZ){ 
+            al.setTurbineBladeCoords(turbine, np_to_arr(coordsX), np_to_arr(coordsY), np_to_arr(coordsZ)); 
         }, py::arg("turbine"), py::arg("blade_coords_x"), py::arg("blade_coords_y"), py::arg("blade_coords_z") )
-        .def("set_turbine_blade_velocities", [](ActuatorFarm& al, uint turbine, arr velocitiesX, arr velocitiesY, arr velocitiesZ)
-        {
-            al.setTurbineBladeVelocities(turbine, static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); 
+        .def("set_turbine_blade_velocities", [](ActuatorFarm& al, uint turbine, arr velocitiesX, arr velocitiesY, arr velocitiesZ){
+            al.setTurbineBladeVelocities(turbine, np_to_arr(velocitiesX), np_to_arr(velocitiesY), np_to_arr(velocitiesZ)); 
         }, py::arg("turbine"), py::arg("blade_velocities_x"), py::arg("blade_velocities_y"), py::arg("blade_velocities_z") )
-        .def("set_turbine_blade_forces", [](ActuatorFarm& al, uint turbine, arr forcesX, arr forcesY, arr forcesZ)
-        { 
-            al.setTurbineBladeForces(turbine, static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); 
+        .def("set_turbine_blade_forces", [](ActuatorFarm& al, uint turbine, arr forcesX, arr forcesY, arr forcesZ){ 
+            al.setTurbineBladeForces(turbine, np_to_arr(forcesX), np_to_arr(forcesY), np_to_arr(forcesZ)); 
         }, py::arg("turbine"), py::arg("blade_forces_x"), py::arg("blade_forces_y"), py::arg("blade_forces_z") )
         .def("calc_blade_forces", &ActuatorFarm::calcBladeForces);
     }
diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp
index 59d0bd5708b11f246664d1e8c7ee198f986d80c1..6a716478040ef98765423771223d99b59cc6e273 100644
--- a/pythonbindings/src/gpu/submodules/grid_generator.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp
@@ -42,6 +42,10 @@
 #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
 #include "gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
 #include "gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "basics/constants/NumericConstants.h"
+
+using namespace vf::basics::constant;
+
 
 namespace grid_generator
 {
@@ -91,7 +95,7 @@ namespace grid_generator
         .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition, py::arg("side_type"), py::arg("rho"))
         .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition, py::arg("periodic_x"), py::arg("periodic_y"), py::arg("periodic_z"))
         .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition, py::arg("side_type"))
-        .def("set_precursor_boundary_condition", &LevelGridBuilder::setPrecursorBoundaryCondition, py::arg("side_type"), py::arg("file_collection"), py::arg("n_t_read"), py::arg("velocity_x")=0.0f, py::arg("velocity_y")=0.0f, py::arg("velocity_z")=0.0f, py::arg("file_level_to_grid_level_map")=std::vector<uint>())
+        .def("set_precursor_boundary_condition", &LevelGridBuilder::setPrecursorBoundaryCondition, py::arg("side_type"), py::arg("file_collection"), py::arg("n_t_read"), py::arg("velocity_x")=c0o1, py::arg("velocity_y")=c0o1, py::arg("velocity_z")=c0o1, py::arg("file_level_to_grid_level_map")=std::vector<uint>())
         .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition, py::arg("side_type"), py::arg("normal_x"), py::arg("normal_y"), py::arg("normal_z"), py::arg("sampling_offset"), py::arg("z0"), py::arg("dx"));
 
         py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(gridGeneratorModule, "MultipleGridBuilder")
diff --git a/pythonbindings/src/gpu/submodules/probes.cpp b/pythonbindings/src/gpu/submodules/probes.cpp
index 7c26958df81a60f00c9909a91f5576a5931652d4..9c3fc8ab15234013a093dbe00f6654ea3257d4b0 100644
--- a/pythonbindings/src/gpu/submodules/probes.cpp
+++ b/pythonbindings/src/gpu/submodules/probes.cpp
@@ -72,13 +72,16 @@ namespace probes
                         uint,
                         uint, 
                         uint,
-                        uint>(), 
+                        uint,
+                        bool>(), 
                         py::arg("probe_name"),
                         py::arg("output_path"),
                         py::arg("t_start_avg"),
                         py::arg("t_avg"),
                         py::arg("t_start_out"),
-                        py::arg("t_out"))
+                        py::arg("t_out"),
+                        py::arg("output_timeseries"))
+        .def("add_probe_point", &PointProbe::addProbePoint, py::arg("point_coord_x"), py::arg("point_coord_y"), py::arg("point_coord_z"))
         .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList, py::arg("point_coords_x"), py::arg("point_coords_y"), py::arg("point_coords_z"))
         .def("add_probe_points_from_x_normal_plane", &PointProbe::addProbePointsFromXNormalPlane, py::arg("pos_x"), py::arg("pos0_y"), py::arg("pos0_z"), py::arg("pos1_y"), py::arg("pos1_z"), py::arg("n_y"), py::arg("n_z"));
 
diff --git a/setup.py b/setup.py
index 530431b3775970b5222bc87d32bfb407363f95d6..e40712e28ffb5ff72503b402478310179bf5e909 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,3 @@
-import sys
-from pathlib import Path
-from typing import List
-
-import skbuild
-
 """
 Install python wrapper of Virtual Fluids
 install via python:
@@ -16,35 +10,32 @@ or install via pip:
     for pip>21:
         set CMAKE Flags via --config-settings "-DBUILD_VF_GPU=ON"
         example: pip install . --config-settings="-DBUILD_VF_GPU=ON"
-        each option has to be passed in individually i.e --config-settings="-DOPT1=ON" --config-settings="-DOPT2=OFF"
+        each option has to be passed in individually i.e
+        --config-settings="-DOPT1=ON" --config-settings="-DOPT2=OFF"
     for pip <21:
         set CMAKE Flags via --global-option ="-DBUILD_VF_GPU=ON"
         example: pip install . --global-option="-DBUILD_VF_GPU=ON"
 """
+import sys
+from pathlib import Path
+
+from setuptools import find_namespace_packages
+import skbuild
 
 package_name = "pyfluids"
 target = "python_bindings"
 src_dir = "pythonbindings"
-stub_package = package_name+"-stubs"
+stubs_package = package_name+"-stubs"
+stub_dir = Path(src_dir)/stubs_package
 
-stub_dir = Path(src_dir)/stub_package
 
+def find_stub_subpackages(stub_dir: Path):
+    return [str(d.parent.relative_to(stub_dir.parent)) for d in stub_dir.rglob("__init__.pyi")]
 
-def add_subfiles(dir_path: Path, suffix: str, root_dir: Path) -> List[str]:
-    files = []
-    for f in dir_path.iterdir():
-        if f.is_dir():
-            files.extend(add_subfiles(f, suffix, root_dir))
-        if f.is_file():
-            if f.suffix != suffix:
-                continue
-            files.append(str(f.relative_to(root_dir)))
-    return files
 
-def add_directory(dir_path: Path, suffix: str):
-    return add_subfiles(dir_path, suffix, dir_path)
+def find_stub_files(dir: Path):
+    return [str(f.relative_to(dir)) for f in dir.rglob("*.pyi")]
 
-stub_files = add_directory(stub_dir, ".pyi")
 
 # hack to get config-args for installation with pip>21
 cmake_args = []
@@ -62,11 +53,10 @@ cmake_args += [
 
 skbuild.setup(
     name=package_name,
-    packages=[package_name, "pymuparser", "pyfluids-stubs"],
+    packages=find_namespace_packages(where=src_dir)+find_stub_subpackages(stub_dir),
     package_dir={"": src_dir},
     cmake_args=cmake_args,
     cmake_install_target=target,
-    package_data={  "pyfluids": ["py.typed"],
-                    "pyfluids-stubs": stub_files},
+    package_data={package_name: ["py.typed"], stubs_package: find_stub_files(stub_dir)},
     include_package_data=True,
 )
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
index 571796d503a1a73b3eccf631a347884c7522b533..15efa9d3204cc37d4c4ccd8778ea13769ef655b8 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
@@ -49,16 +49,16 @@ std::vector<T> readStringToVector(std::string s)
 
 std::string readElement(std::string line)
 {
-    size_t elemStart = line.find("<")+1;
+    const size_t elemStart = line.find('<')+1;
     // size_t elemEnd = line.find("/>", elemStart);
-    size_t nameLen = line.find(" ", elemStart)-elemStart;
+    const size_t nameLen = line.find(' ', elemStart)-elemStart;
     return line.substr(elemStart, nameLen);
 }
 
 std::string readAttribute(std::string line, std::string attributeName)
 {
-    size_t attributeStart = line.find(attributeName)+attributeName.size() + 2; // add 2 for '="'
-    size_t attributeLen = line.find("\"", attributeStart)-attributeStart;
+    const size_t attributeStart = line.find(attributeName)+attributeName.size() + 2; // add 2 for '="'
+    const size_t attributeLen = line.find("\"", attributeStart)-attributeStart;
     return line.substr(attributeStart, attributeLen);
 }
 
@@ -94,7 +94,7 @@ void VTKFile::readHeader()
     getline(file, line); // </ImageData
     getline(file, line); // AppendedData
 
-    int offset = int(file.tellg())+sizeof(char)+4; // skip underscore and bytesPerVal
+    const int offset = int(file.tellg())+sizeof(char)+4; // skip underscore and bytesPerVal
 
     for(auto& quantity: this->quantities)
     {
@@ -118,7 +118,7 @@ void VTKFile::readHeader()
 
 }
 
-bool VTKFile::markNANs(std::vector<uint> readIndices)
+bool VTKFile::markNANs(const std::vector<uint>& readIndices) const
 {
     std::ifstream buf(fileName.c_str(), std::ios::in | std::ios::binary);
 
@@ -126,7 +126,7 @@ bool VTKFile::markNANs(std::vector<uint> readIndices)
     tmp.reserve(readIndices.size());
     buf.seekg(this->quantities[0].offset);
     buf.read((char*) tmp.data(), sizeof(double)*readIndices.size());
-    auto firstNAN = std::find_if(tmp.begin(), tmp.end(), [](auto it){ return isnan(it); });
+    const auto firstNAN = std::find_if(tmp.begin(), tmp.end(), [](auto it){ return isnan(it); });
     
     return firstNAN != tmp.end();
 }
@@ -161,7 +161,7 @@ void VTKFile::getData(real *data, uint numberOfNodes, const std::vector<uint> &r
 {
     if(!this->loaded) loadFile();
 
-    size_t nPoints = writeIndices.size();
+    const size_t nPoints = writeIndices.size();
 
     for(size_t j=0; j<this->quantities.size(); j++)
     {
@@ -177,7 +177,7 @@ void VTKFile::printFileInfo()
 {
     printf("file %s with \n nx %i ny %i nz %i \n origin %f %f %f \n spacing %f %f %f \n", 
             fileName.c_str(), nx, ny, nz, minX, minY, minZ, deltaX, deltaY, deltaZ);
-    for(auto quantity: this->quantities)
+    for(const auto& quantity: this->quantities)
     {
         printf("\t quantity %s offset %i \n", quantity.name.c_str(), quantity.offset);
     }
@@ -199,8 +199,8 @@ void VTKFileCollection::findFiles()
             std::vector<VTKFile> filesWithThisId;
             while (!foundLastPart)
             {
-                std::string fname = makeFileName((int)files.size(), (int)filesOnThisLevel.size(), (int)filesWithThisId.size());
-                std::ifstream f(fname);
+                const std::string fname = makeFileName((int)files.size(), (int)filesOnThisLevel.size(), (int)filesWithThisId.size());
+                const std::ifstream f(fname);
                 if(f.good())
                     filesWithThisId.emplace_back(fname);
                 else
@@ -223,7 +223,7 @@ void VTKFileCollection::findFiles()
     }
 
     if(files.empty())
-        VF_LOG_CRITICAL("VTKFileCollection found no files!"); 
+        throw std::runtime_error("VTKFileCollection found no files!");
 }
     
 void TransientBCInputFileReader::getNeighbors(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM)
@@ -260,8 +260,8 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
 {
     this->nPoints = (uint)coordsY.size();
     this->initializeIndexVectors();
-    real max_diff = 1e-4; // maximum distance between point on grid and precursor plane to count as exact match
-    real eps = 1e-7; // small number to avoid division by zero
+    const real max_diff = 1e-4; // maximum distance between point on grid and precursor plane to count as exact match
+    const real eps = 1e-7; // small number to avoid division by zero
     bool perfect_match = true;
 
     this->weights0PP.reserve(this->nPoints);
@@ -277,11 +277,11 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
     for(uint i=0; i<nPoints; i++)
     {
 
-        real posY = coordsY[i];
-        real posZ = coordsZ[i];
+        const real posY = coordsY[i];
+        const real posZ = coordsZ[i];
         bool found0PP = false, found0PM = false, found0MP = false, found0MM = false, foundAll = false;
 
-        uint level = this->readLevel;
+        const uint level = this->readLevel;
 
         for(int fileId=0; fileId<(int)this->fileCollection->files[level].size(); fileId++)
         {
@@ -290,7 +290,7 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
 
             // y in simulation is x in precursor/file, z in simulation is y in precursor/file 
             // simulation -> file: N -> E, S -> W, T -> N, B -> S
-            int idx = file.findNeighborMMM(posY, posZ, 0.f);                            //!> index of nearest WSB neighbor on precursor file
+            const int idx = file.findNeighborMMM(posY, posZ, 0.f);                            //!> index of nearest WSB neighbor on precursor file
             
             if(idx!=-1)
             {
@@ -301,7 +301,7 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
                     this->weights0PM.emplace_back(0.f);
                     this->weights0MP.emplace_back(0.f);
                     this->weights0MM.emplace_back(0.f);
-                    uint writeIdx = this->getWriteIndex(level, fileId, idx);            //!> writeIdx: index on host/device array where precursor value will be written to after loading from file
+                    const uint writeIdx = this->getWriteIndex(level, fileId, idx);            //!> writeIdx: index on host/device array where precursor value will be written to after loading from file
                     this->planeNeighbor0PP.push_back(writeIdx);                          //!> neighbor lists mapping where BC kernel should read from on host/device array
                     this->planeNeighbor0PM.push_back(writeIdx);
                     this->planeNeighbor0MP.push_back(writeIdx);
@@ -319,8 +319,8 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
                 if(!found0MM)
                 {
                     found0MM = true;
-                    real dy = file.getX(idx)-posY;
-                    real dz = file.getY(idx)-posZ;
+                    const real dy = file.getX(idx)-posY;
+                    const real dz = file.getY(idx)-posZ;
                     this->weights0MM.emplace_back(1.f/(dy*dy+dz*dz+eps));
                     this->planeNeighbor0MM.emplace_back(getWriteIndex(level, fileId, idx));
                 }
@@ -329,12 +329,12 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
             
             if(!found0PP) //NT in simulation is EN in precursor
             {
-                int index = file.findNeighborPPM(posY, posZ, 0.f);
+                const int index = file.findNeighborPPM(posY, posZ, 0.f);
                 if(index!=-1)
                 {
                     found0PP = true;
-                    real dy = file.getX(index)-posY;
-                    real dz = file.getY(index)-posZ;
+                    const real dy = file.getX(index)-posY;
+                    const real dz = file.getY(index)-posZ;
                     this->weights0PP.emplace_back(1.f/(dy*dy+dz*dz+eps));
                     this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index));
                 }
@@ -342,12 +342,12 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
 
             if(!found0PM) //NB in simulation is ES in precursor
             {
-                int index = file.findNeighborPMM(posY, posZ, 0.f);
+                const int index = file.findNeighborPMM(posY, posZ, 0.f);
                 if(index!=-1)
                 {
                     found0PM = true;
-                    real dy = file.getX(index)-posY;
-                    real dz = file.getY(index)-posZ;
+                    const real dy = file.getX(index)-posY;
+                    const real dz = file.getY(index)-posZ;
                     this->weights0PM.emplace_back(1.f/(dy*dy+dz*dz+eps));
                     this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index));
                 }
@@ -355,12 +355,12 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
 
             if(!found0MP) //ST in simulation is WN in precursor
             {
-                int index = file.findNeighborMPM(posY, posZ, 0.f);
+                const int index = file.findNeighborMPM(posY, posZ, 0.f);
                 if(index!=-1)
                 {
                     found0MP = true;
-                    real dy = file.getX(index)-posY;
-                    real dz = file.getY(index)-posZ;
+                    const real dy = file.getX(index)-posY;
+                    const real dz = file.getY(index)-posZ;
                     this->weights0MP.emplace_back(1.f/(dy*dy+dz*dz+eps));
                     this->planeNeighbor0MP.emplace_back(getWriteIndex(level, fileId, index));
                 }
@@ -391,8 +391,8 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
 
 uint VTKReader::getWriteIndex(int level, int id, int linearIndex)
 {
-    auto it = std::find(this->writeIndices[level][id].begin(), this->writeIndices[level][id].end(), linearIndex);
-    uint idx = it-this->writeIndices[level][id].begin();
+    const auto it = std::find(this->writeIndices[level][id].begin(), this->writeIndices[level][id].end(), linearIndex);
+    const uint idx = it-this->writeIndices[level][id].begin();
     if(it==this->writeIndices[level][id].end())                         
     {
         this->writeIndices[level][id].push_back(this->nPointsRead);     //!> index on host/device array where value from file will be written to
@@ -407,17 +407,16 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
 {
     // for(size_t level=0; level<this->fileCollection->files.size(); level++)
     // {
-        uint level = this->readLevel;
+        const uint level = this->readLevel;
         for(size_t id=0; id<this->fileCollection->files[level].size(); id++)
         {
             size_t numberOfFiles = this->nFile[level][id];
 
-
             if(!this->fileCollection->files[level][id][numberOfFiles].inZBounds(time))
             {
                 numberOfFiles++;
 
-                VF_LOG_INFO("PrecursorBC on level {}: switching to file no. {}\n", level, numberOfFiles);
+                VF_LOG_INFO("PrecursorBC on level {}: switching to file no. {}", level, numberOfFiles);
                 if(numberOfFiles == this->fileCollection->files[level][id].size())
                     throw std::runtime_error("Not enough Precursor Files to read");
 
@@ -433,10 +432,9 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
                 }
             }
         
-
             VTKFile* file = &this->fileCollection->files[level][id][numberOfFiles];
 
-            int off = file->getClosestIdxZ(time)*file->getNumberOfPointsInXYPlane();
+            const int off = file->getClosestIdxZ(time)*file->getNumberOfPointsInXYPlane();
             file->getData(data, numberOfNodes, this->readIndices[level][id], this->writeIndices[level][id], off, this->writingOffset);
             this->nFile[level][id] = numberOfFiles;
         }
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
index bdf29745a0a60473d0454c33dcb10a193ca10780..5b9ea91a831da4d9bf189418e0e3eaaf8abd7357 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
@@ -41,7 +41,7 @@ public:
     };
 
     void getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite);
-    bool markNANs(std::vector<uint> readIndices);
+    bool markNANs(const std::vector<uint>& readIndices) const;
     bool inBoundingBox(real posX, real posY, real posZ){return  inXBounds(posX) && inYBounds(posY) && inZBounds(posZ); };
     bool inXBounds(real posX){ return posX<=maxX && posX>=minX; };
     bool inYBounds(real posY){ return posY<=maxY && posY>=minY; };
@@ -183,7 +183,7 @@ public:
     void getNextData(real* data, uint numberOfNodes, real time) override;
     void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ) override;
 private:  
-    uint getWriteIndex(int level, int id, int linearIdx);
+    uint getWriteIndex(int level, int id, int linearIndex);
     void initializeIndexVectors();
 
 private:
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index e7a9e5bb6b5a5b0fa24d7ff7da5e3318891ea48d..9b2d1c4f5fa742b46ecd9ad3a9f8e86b499909fb 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -35,7 +35,7 @@ void GridProvider::setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemp
     para->getParD(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes;
 }
 
-void GridProvider::setInitalNodeValues(uint numberOfNodes, int level) const
+void GridProvider::setInitialNodeValues(uint numberOfNodes, int level) const
 {
     for (uint pos = 1; pos <= numberOfNodes; pos++)
     {
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index 079843371208891cc2ef6ae206f53a6f57678a1a..ee6c93a5f718a2e6907e178bf7b751fbaed824dd 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -48,7 +48,7 @@ public:
 protected:
     void setNumberOfNodes(uint numberOfNodes, int level) const;
     void setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const;
-    virtual void setInitalNodeValues(uint numberOfNodes, int level) const;
+    virtual void setInitialNodeValues(uint numberOfNodes, int level) const;
 
     void setPressSizePerLevel(int level, int sizePerLevel) const;
     void setVelocitySizePerLevel(int level, int sizePerLevel) const;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
index c126b2f79e02272a0bd86bfe0f76fe5efe09a5a7..000e97e9a14e51d140dfa30cb325d2a04cd83f50 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
@@ -94,7 +94,7 @@ void GridReader::allocArrays_CoordNeighborGeo()
         neighWSB->initalNeighbors(para->getParH(level)->neighborInverse, level);
         geoV.initalNeighbors(     para->getParH(level)->typeOfGridNode,          level);
         rearrangeGeometry(para.get(), level);
-		setInitalNodeValues(numberOfNodesPerLevel, level);
+		setInitialNodeValues(numberOfNodesPerLevel, level);
 
         cudaMemoryManager->cudaCopyNeighborWSB(level);
         cudaMemoryManager->cudaCopySP(level);
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 8984c15ebcc44598a30a3864945198ab0c5fde07..e3c86317c3bf7e4ece5720ac8117e5f418b22fa4 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -4,6 +4,7 @@
 #include "Parameter/Parameter.h"
 #include "GridGenerator/grid/GridBuilder/GridBuilder.h"
 #include "GPU/CudaMemoryManager.h"
+#include "Parameter/CudaStreamManager.h"
 #include "IndexRearrangementForStreams.h"
 #include "InterpolationCellGrouper.h"
 
@@ -92,7 +93,7 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
             para->getParH(level)->typeOfGridNode,
             level);
 
-        setInitalNodeValues(numberOfNodesPerLevel, level);
+        setInitialNodeValues(numberOfNodesPerLevel, level);
 
         cudaMemoryManager->cudaCopyNeighborWSB(level);
         cudaMemoryManager->cudaCopySP(level);
@@ -364,6 +365,8 @@ void GridGenerator::allocArrays_BoundaryValues()
 
             cudaMemoryManager->cudaCopyPrecursorBC(level);
             cudaMemoryManager->cudaAllocPrecursorData(level);
+            para->getParD(level)->precursorBC.streamIndex = para->getStreamManager()->registerAndLaunchStream(CudaStreamIndex::Precursor);
+            
 
             // read first timestep of precursor into next and copy to next on device
             for(auto reader : para->getParH(level)->transientBCInputFileReader)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 14e090ff3f02eaec87a9f709fc0e0ac8df711189..64943d19ff54bfaa174d0728a96c517f6605d565 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -1740,11 +1740,11 @@ void CudaMemoryManager::cudaCopyPrecursorBC(int lev)
 }
 void CudaMemoryManager::cudaCopyPrecursorData(int lev)
 {
-    auto prec = &parameter->getParH(lev)->precursorBC;
-    auto precStream = parameter->getStreamManager()->getStream(CudaStreamIndex::Precursor);
-    size_t memSize = prec->numberOfPrecursorNodes*sizeof(real)*prec->numberOfQuantities;
-    checkCudaErrors( cudaStreamSynchronize(precStream) );
-    checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, precStream) );
+    auto precurser = &parameter->getParH(lev)->precursorBC;
+    auto precurserStream = parameter->getStreamManager()->getStream(CudaStreamIndex::Precursor, precurser->streamIndex);
+    size_t memSize = precurser->numberOfPrecursorNodes*sizeof(real)*precurser->numberOfQuantities;
+    checkCudaErrors( cudaStreamSynchronize(precurserStream) );
+    checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, precurser->next, memSize, cudaMemcpyHostToDevice, precurserStream) );
 }
 
 
@@ -3399,14 +3399,17 @@ void CudaMemoryManager::cudaAllocProbeIndices(Probe* probe, int level)
     checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->pointIndicesD, tmp) );
     setMemsizeGPU(1.f*tmp, false);
 }
+
 void CudaMemoryManager::cudaCopyProbeIndicesHtoD(Probe* probe, int level)
 {
     checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesD, probe->getProbeStruct(level)->pointIndicesH, sizeof(int)*probe->getProbeStruct(level)->nIndices, cudaMemcpyHostToDevice) );
 }
+
 void CudaMemoryManager::cudaCopyProbeIndicesDtoH(Probe* probe, int level)
 {
     checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesH, probe->getProbeStruct(level)->pointIndicesD, sizeof(int)*probe->getProbeStruct(level)->nIndices, cudaMemcpyDeviceToHost) );
 }
+
 void CudaMemoryManager::cudaFreeProbeIndices(Probe* probe, int level)
 {
     checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->pointIndicesH) );
@@ -3415,25 +3418,28 @@ void CudaMemoryManager::cudaFreeProbeIndices(Probe* probe, int level)
 
 void CudaMemoryManager::cudaAllocProbeQuantityArray(Probe* probe, int level)
 {
-    size_t tmp = sizeof(real)*probe->getProbeStruct(level)->nArrays*probe->getProbeStruct(level)->nPoints;
+    auto probeStruct = probe->getProbeStruct(level);
+    size_t tmp = sizeof(real)*probeStruct->nArrays*probeStruct->nPoints*probeStruct->nTimesteps;
 
-    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesArrayH, tmp) );
+    checkCudaErrors( cudaMallocHost((void**) &probeStruct->quantitiesArrayH, tmp) );
     if(probe->getHasDeviceQuantityArray())
     {
-        checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->quantitiesArrayD, tmp) );
+        checkCudaErrors( cudaMalloc    ((void**) &probeStruct->quantitiesArrayD, tmp) );
         setMemsizeGPU(1.f*tmp, false);
     }
 }
 
 void CudaMemoryManager::cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level)
 {
-    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesArrayD, probe->getProbeStruct(level)->quantitiesArrayH, probe->getProbeStruct(level)->nArrays*sizeof(real)*probe->getProbeStruct(level)->nPoints, cudaMemcpyHostToDevice) );
+    auto probeStruct = probe->getProbeStruct(level);
+    size_t tmp = sizeof(real)*probeStruct->nArrays*probeStruct->nPoints*probeStruct->nTimesteps;
+    checkCudaErrors( cudaMemcpy(probeStruct->quantitiesArrayD, probeStruct->quantitiesArrayH, tmp, cudaMemcpyHostToDevice) );
 }
 void CudaMemoryManager::cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level)
 {
     auto probeStruct = probe->getProbeStruct(level);
-
-    checkCudaErrors( cudaMemcpy(probeStruct->quantitiesArrayH, probeStruct->quantitiesArrayD, probeStruct->nArrays*sizeof(real)*probeStruct->nPoints, cudaMemcpyDeviceToHost) );
+    size_t tmp = sizeof(real)*probeStruct->nArrays*probeStruct->nPoints*probeStruct->nTimesteps;
+    checkCudaErrors( cudaMemcpy(probeStruct->quantitiesArrayH, probeStruct->quantitiesArrayD, tmp, cudaMemcpyDeviceToHost) );
 }
 
 void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level)
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
index e8fc3f318c920be36be7861a28659124a7b1e977..1c1e65fa34720a1f9d19b5ef02e4afda167b323f 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
@@ -415,7 +415,7 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
         para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next;
         para->getParD(level)->precursorBC.next = tmp;
 
-        real loadTime = nextTime*pow(2,-level)*para->getTimeRatio();
+        real loadTime = nextTime*exp2(-level)*para->getTimeRatio();
 
         for(auto reader : para->getParH(level)->transientBCInputFileReader)
         {   
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index a5ae5f5ceef213e8ec9b2306106035a09b1ffd0d..ead35be5e8b60534105b84badb2e96c98e9adfc5 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -167,6 +167,7 @@ typedef struct QforPrecursorBC{
    int numberOfBCnodes=0;
    int sizeQ;
    int numberOfPrecursorNodes=0;
+   uint streamIndex=0;
    uint nPrecursorReads=0;
    uint timeStepsBetweenReads;
    size_t numberOfQuantities;
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 55c0250223901a94d03bd1e65bbb72438bcc99c3..dddc795ccda96b49a03373c05a701f32662e9565 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -139,9 +139,8 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
     //////////////////////////////////////////////////////////////////////////
     // CUDA streams
     if (para->getUseStreams()) {
-        para->getStreamManager()->registerStream(CudaStreamIndex::SubDomainBorder);
-        para->getStreamManager()->registerStream(CudaStreamIndex::Bulk);
-        para->getStreamManager()->launchStreams();
+        para->getStreamManager()->registerAndLaunchStream(CudaStreamIndex::SubDomainBorder);
+        para->getStreamManager()->registerAndLaunchStream(CudaStreamIndex::Bulk);
         para->getStreamManager()->createCudaEvents();
     }
     //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
index cb8cefa389c141b7f38bbc54a68d8cf9841ba699..e28c802429c1703314c59402e2d4d490388b8ce1 100644
--- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
@@ -8,7 +8,7 @@
 #include "FileWriter.h"
 #include <logger/Logger.h>
 
-#include <stdio.h>
+#include <cstdio>
 #include <fstream>
 #include <sstream>
 #include <cmath>
@@ -23,6 +23,37 @@
 
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 
+std::string makePartFileNameEnding(int level, int ID, int part, int timestep)
+{
+    return "_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(ID) + "_Part_" + StringUtil::toString<int>(part) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
+}
+
+std::string makeCollectionFileNameEnding(int ID, int timestep)
+{
+    return "_ID_" + StringUtil::toString<int>(ID) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
+}
+
+std::string makePartFileName(const std::string &prefix, int level, int ID, int part, int timestep)
+{
+    return prefix + "_bin" + makePartFileNameEnding(level, ID, part, timestep);
+}
+
+std::string makeMedianPartFileName(const std::string &prefix, int level, int ID, int part, int timestep)
+{
+    return prefix + "_bin_median" + makePartFileNameEnding(level, ID, part, timestep);
+}
+
+
+std::string makeCollectionFileName(const std::string &prefix, int ID, int timestep)
+{
+    return prefix + "_bin" + makeCollectionFileNameEnding(ID, timestep);
+}
+
+std::string makeMedianCollectionFileName(const std::string &prefix, int ID, int timestep)
+{
+    return prefix + "_bin_median" + makeCollectionFileNameEnding(ID, timestep);
+}
+
 void FileWriter::writeInit(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager)
 {
     unsigned int timestep = para->getTimestepInit();
@@ -50,30 +81,29 @@ void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int tim
 
 void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep, int level)
 {
-    const unsigned int numberOfParts = (uint)para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1;
-    std::vector<std::string> fname;
-    std::vector<std::string> fnameMed;
+    const unsigned int numberOfParts = (uint)para->getParH(level)->numberOfNodes / para->getLimitOfNodesForVTK() + 1;
+    std::vector<std::string> fnames;
+    std::vector<std::string> fnamesMed;
 
     for (unsigned int i = 1; i <= numberOfParts; i++)
     {
-        fname.push_back(para->getFName() + "_bin_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_Part_" + StringUtil::toString<int>(i) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk");
-        fnameMed.push_back(para->getFName() + "_bin_median_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_Part_" + StringUtil::toString<int>(i) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk");
+        std::string fname = makePartFileName(para->getFName(), level, para->getMyProcessID(), i, timestep); 
+        std::string fnameMed = makeMedianPartFileName(para->getFName(), level, para->getMyProcessID(), i, timestep); 
 
-        this->fileNamesForCollectionFile.push_back( fname.back() );
-        this->fileNamesForCollectionFileMedian.push_back( fnameMed.back() );
+        fnames.push_back(fname);
+        fnamesMed.push_back(fnameMed);
     }
 
-    if (para->getDiffOn() == true)
-        writeUnstrucuredGridLTConc(para, level, fname);
-    else
-        writeUnstrucuredGridLT(para, level, fname);
+    std::vector<std::string> fnamesLong = writeUnstructuredGridLT(para, level, fnames);
+    for(auto fname : fnamesLong)
+        this->fileNamesForCollectionFile.push_back(fname.substr( fname.find_last_of('/') + 1 ));
+
 
     if (para->getCalcMedian())
     {
-        if (para->getDiffOn() == true)
-            writeUnstrucuredGridMedianLTConc(para, level, fnameMed);
-        else
-            writeUnstrucuredGridMedianLT(para, level, fnameMed);
+        std::vector<std::string> fnamesMedianLong = writeUnstructuredGridMedianLT(para, level, fnamesMed);
+        for(auto fname : fnamesMedianLong)
+            this->fileNamesForCollectionFileMedian.push_back(fname.substr( fname.find_last_of('/') + 1 ));
     }
 }
 
@@ -84,194 +114,169 @@ bool FileWriter::isPeriodicCell(std::shared_ptr<Parameter> para, int level, unsi
            (para->getParH(level)->coordinateZ[number5] < para->getParH(level)->coordinateZ[number1]);
 }
 
-void VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFile(std::shared_ptr<Parameter> para, unsigned int timestep)
+std::vector<std::string> FileWriter::getNodeDataNames(std::shared_ptr<Parameter> para)
 {
 
-    std::string filename = para->getFName() + "_bin_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
-
-    std::ofstream file;
-
-    file.open( filename + ".pvtu" );
-
-    //////////////////////////////////////////////////////////////////////////
-    file << "<?xml version=\"1.0\"?>" << std::endl;
-    file << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">" << std::endl;
-    file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
+    std::vector<std::string> nodeDataNames;
+    nodeDataNames.push_back("press");
+    nodeDataNames.push_back("rho");
+    nodeDataNames.push_back("vx1");
+    nodeDataNames.push_back("vx2");
+    nodeDataNames.push_back("vx3");
+    nodeDataNames.push_back("geo");
 
-    file << "    <PPointData>" << std::endl;
-    file << "       <PDataArray type=\"Float64\" Name=\"press\" /> " << std::endl;
-    file << "       <PDataArray type=\"Float64\" Name=\"rho\"   /> " << std::endl;
-    file << "       <PDataArray type=\"Float64\" Name=\"vx1\"   /> " << std::endl;
-    file << "       <PDataArray type=\"Float64\" Name=\"vx2\"   /> " << std::endl;
-    file << "       <PDataArray type=\"Float64\" Name=\"vx3\"   /> " << std::endl;
-    file << "       <PDataArray type=\"Float64\" Name=\"geo\"   /> " << std::endl;
-    if( para->getDiffOn() ) file << "       <PDataArray type=\"Float64\" Name=\"conc\"  /> " << std::endl;
-    file << "    </PPointData>" << std::endl;
+    if(para->getDiffOn()) 
+        nodeDataNames.push_back("conc");
 
-    file << "    <PPoints>" << std::endl;
-    file << "      <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl;
-    file << "    </PPoints>" << std::endl;
-
-    for( auto& fname : this->fileNamesForCollectionFile )
+    if(para->getIsBodyForce())
     {
-        const auto filenameWithoutPath=fname.substr( fname.find_last_of('/') + 1 );
-        file << "    <Piece Source=\"" << filenameWithoutPath << ".bin.vtu\"/>" << std::endl;
+        nodeDataNames.push_back("Fx");
+        nodeDataNames.push_back("Fy");
+        nodeDataNames.push_back("Fz");
     }
 
-    file << "  </PUnstructuredGrid>" << std::endl;
-    file << "</VTKFile>" << std::endl;
+    if(para->getUseTurbulentViscosity())
+    {
+        nodeDataNames.push_back("nut");
+    }
 
-    //////////////////////////////////////////////////////////////////////////
+    if (para->getCalcTurbulenceIntensity()) {
+        nodeDataNames.push_back("vxx");
+        nodeDataNames.push_back("vyy");
+        nodeDataNames.push_back("vzz");
+        nodeDataNames.push_back("vxy");
+        nodeDataNames.push_back("vxz");
+        nodeDataNames.push_back("vyz");
+    }
+    return nodeDataNames;
+}
 
-    file.close();
+std::vector<std::string> FileWriter::getMedianNodeDataNames(std::shared_ptr<Parameter> para)
+{
+    std::vector<std::string> nodeDataNames;
+    
+    if(para->getDiffOn()) 
+        nodeDataNames.push_back("conc");
+    nodeDataNames.push_back("pressMed");
+    nodeDataNames.push_back("rhoMed");
+    nodeDataNames.push_back("vx1Med");
+    nodeDataNames.push_back("vx2Med");
+    nodeDataNames.push_back("vx3Med");
+    nodeDataNames.push_back("geo");
+
+    return nodeDataNames;
+}
 
+std::string VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFile(std::shared_ptr<Parameter> para, unsigned int timestep)
+{
+    std::string filename = makeCollectionFileName(para->getFName(), para->getMyProcessID(), timestep);
+    auto nodeDataNames = this->getNodeDataNames(para);
+    std::vector<std::string> cellDataNames;
+    std::string pFileName= WbWriterVtkXmlBinary::getInstance()->writeParallelFile(filename, this->fileNamesForCollectionFile, nodeDataNames, cellDataNames);
     this->fileNamesForCollectionFile.clear();
+    return pFileName;
 }
 
-void VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFileMedian(std::shared_ptr<Parameter> para, unsigned int timestep)
+std::string VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFileMedian(std::shared_ptr<Parameter> para, unsigned int timestep)
 {
-
-    std::string filename = para->getFName() + "_bin_median_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
-
-    std::ofstream file;
-
-    file.open( filename + ".pvtu" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl;
-    file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
-
-    file << "    <PPointData>" << std::endl;
-    if( para->getDiffOn() ) file << "       <DataArray type=\"Float32\" Name=\"concMed\"  /> " << std::endl;
-    file << "       <DataArray type=\"Float32\" Name=\"pressMed\" /> " << std::endl;
-    file << "       <DataArray type=\"Float32\" Name=\"rhoMed\"   /> " << std::endl;
-    file << "       <DataArray type=\"Float32\" Name=\"vx1Med\"   /> " << std::endl;
-    file << "       <DataArray type=\"Float32\" Name=\"vx2Med\"   /> " << std::endl;
-    file << "       <DataArray type=\"Float32\" Name=\"vx3Med\"   /> " << std::endl;
-    file << "       <DataArray type=\"Float32\" Name=\"geo\"   /> " << std::endl;
-    file << "    </PPointData>" << std::endl;
-
-    file << "    <PPoints>" << std::endl;
-    file << "      <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl;
-    file << "    </PPoints>" << std::endl;
-
-    for( auto& fname : this->fileNamesForCollectionFileMedian )
-    {
-        const auto filenameWithoutPath=fname.substr( fname.find_last_of('/') + 1 );
-        file << "    <Piece Source=\"" << filenameWithoutPath << ".bin.vtu\"/>" << std::endl;
-    }
-
-    file << "  </PUnstructuredGrid>" << std::endl;
-    file << "</VTKFile>" << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.close();
-
+    std::string filename = makeMedianCollectionFileName(para->getFName(), para->getMyProcessID(), timestep);
+    std::vector<std::string> nodeDataNames = getMedianNodeDataNames(para);
+    std::vector<std::string> cellDataNames;
+    std::string pFileName =  WbWriterVtkXmlBinary::getInstance()->writeParallelFile(filename, this->fileNamesForCollectionFileMedian, nodeDataNames, cellDataNames);
     this->fileNamesForCollectionFileMedian.clear();
+    return pFileName;
 }
 
-void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname)
+std::vector<std::string> FileWriter::writeUnstructuredGridLT(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname)
 {
     std::vector< UbTupleFloat3 > nodes;
     std::vector< UbTupleUInt8 > cells;
-    std::vector< std::string > nodedatanames;
-    nodedatanames.push_back("press");
-    nodedatanames.push_back("rho");
-    nodedatanames.push_back("vx1");
-    nodedatanames.push_back("vx2");
-    nodedatanames.push_back("vx3");
-    nodedatanames.push_back("geo");
+    std::vector< std::string > nodeDataNames = getNodeDataNames(para);
+
+    std::vector< std::string > outFNames;
+
+    uint dataIndex = 6;
+
+    uint firstConcNode = dataIndex;
+    if(para->getDiffOn()) dataIndex++;
     
-    uint firstBodyForceNode = (uint) nodedatanames.size();
-    if(para->getIsBodyForce())
-    {
-        nodedatanames.push_back("Fx");
-        nodedatanames.push_back("Fy");
-        nodedatanames.push_back("Fz");
-    }
+    uint firstBodyForceNode = dataIndex;
+    if(para->getIsBodyForce()) dataIndex+=3;
 
-    uint firstNutNode = (uint) nodedatanames.size();
-    if(para->getUseTurbulentViscosity())
-    {
-        nodedatanames.push_back("nut");
-    }
+    uint firstNutNode = dataIndex;
+    if(para->getUseTurbulentViscosity()) dataIndex++;
 
-    uint firstTurbNode = (uint) nodedatanames.size();
-    if (para->getCalcTurbulenceIntensity()) {
-        nodedatanames.push_back("vxx");
-        nodedatanames.push_back("vyy");
-        nodedatanames.push_back("vzz");
-        nodedatanames.push_back("vxy");
-        nodedatanames.push_back("vxz");
-        nodedatanames.push_back("vyz");
-    }
+    uint firstTurbulenceNode = dataIndex;
+    if (para->getCalcTurbulenceIntensity()) dataIndex += 6;
     unsigned int number1, number2, number3, number4, number5, number6, number7, number8;
     uint dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8;
     bool neighborsAreFluid;
-    unsigned int startpos = 0;
-    unsigned int endpos = 0;
-    unsigned int sizeOfNodes = 0;
-    std::vector< std::vector< double > > nodedata(nodedatanames.size());
+    unsigned int startPosition;
+    unsigned int endPosition;
+    unsigned int sizeOfNodes;
+    std::vector< std::vector< double > > nodeData(nodeDataNames.size());
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
-            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1)*para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
         else
-            sizeOfNodes = para->getlimitOfNodesForVTK();
+            sizeOfNodes = para->getLimitOfNodesForVTK();
 
         //////////////////////////////////////////////////////////////////////////
-        startpos = part * para->getlimitOfNodesForVTK();
-        endpos = startpos + sizeOfNodes;
+        startPosition = part * para->getLimitOfNodesForVTK();
+        endPosition = startPosition + sizeOfNodes;
         //////////////////////////////////////////////////////////////////////////
         cells.clear();
         nodes.resize(sizeOfNodes);
-        for (uint i = 0; i < (uint)nodedatanames.size(); i++)
-            nodedata[i].resize(sizeOfNodes);
+        for (uint i = 0; i < (uint)nodeDataNames.size(); i++)
+            nodeData[i].resize(sizeOfNodes);
 
         //////////////////////////////////////////////////////////////////////////
-        for (unsigned int pos = startpos; pos < endpos; pos++)
+        for (unsigned int pos = startPosition; pos < endPosition; pos++)
         {
             if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
             {
+
                 //////////////////////////////////////////////////////////////////////////
                 double x1 = para->getParH(level)->coordinateX[pos];
                 double x2 = para->getParH(level)->coordinateY[pos];
                 double x3 = para->getParH(level)->coordinateZ[pos];
                 //////////////////////////////////////////////////////////////////////////
                 number1 = pos;
-                dn1 = pos - startpos;
+                dn1 = pos - startPosition;
                 neighborsAreFluid = true;
                 //////////////////////////////////////////////////////////////////////////
                 nodes[dn1] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
-                nodedata[0][dn1] = (double)para->getParH(level)->pressure[pos] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
-                nodedata[1][dn1] = (double)para->getParH(level)->rho[pos] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
-                nodedata[2][dn1] = (double)para->getParH(level)->velocityX[pos] * (double)para->getVelocityRatio();
-                nodedata[3][dn1] = (double)para->getParH(level)->velocityY[pos] * (double)para->getVelocityRatio();
-                nodedata[4][dn1] = (double)para->getParH(level)->velocityZ[pos] * (double)para->getVelocityRatio();
-                nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
+                nodeData[0][dn1] = (double)para->getParH(level)->pressure[pos] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
+                nodeData[1][dn1] = (double)para->getParH(level)->rho[pos] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
+                nodeData[2][dn1] = (double)para->getParH(level)->velocityX[pos] * (double)para->getVelocityRatio();
+                nodeData[3][dn1] = (double)para->getParH(level)->velocityY[pos] * (double)para->getVelocityRatio();
+                nodeData[4][dn1] = (double)para->getParH(level)->velocityZ[pos] * (double)para->getVelocityRatio();
+                nodeData[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
+
+                if(para->getDiffOn())
+                    nodeData[firstConcNode][dn1] = (double)para->getParH(level)->concentration[pos];
 
                 if(para->getIsBodyForce())
                 {
-                    nodedata[firstBodyForceNode    ][dn1] = (double)para->getParH(level)->forceX_SP[pos] * (double)para->getScaledForceRatio(level);
-                    nodedata[firstBodyForceNode + 1][dn1] = (double)para->getParH(level)->forceY_SP[pos] * (double)para->getScaledForceRatio(level);
-                    nodedata[firstBodyForceNode + 2][dn1] = (double)para->getParH(level)->forceZ_SP[pos] * (double)para->getScaledForceRatio(level);
+                    nodeData[firstBodyForceNode    ][dn1] = (double)para->getParH(level)->forceX_SP[pos] * (double)para->getScaledForceRatio(level);
+                    nodeData[firstBodyForceNode + 1][dn1] = (double)para->getParH(level)->forceY_SP[pos] * (double)para->getScaledForceRatio(level);
+                    nodeData[firstBodyForceNode + 2][dn1] = (double)para->getParH(level)->forceZ_SP[pos] * (double)para->getScaledForceRatio(level);
                 }
 
                 if(para->getUseTurbulentViscosity())
                 {
-                    nodedata[firstNutNode][dn1] = (double)para->getParH(level)->turbViscosity[pos] * (double)para->getScaledViscosityRatio(level);
+                    nodeData[firstNutNode][dn1] = (double)para->getParH(level)->turbViscosity[pos] * (double)para->getScaledViscosityRatio(level);
                 }
 
                 if (para->getCalcTurbulenceIntensity()) {
-                    nodedata[firstTurbNode    ][dn1] = (double)para->getParH(level)->vxx[pos];
-                    nodedata[firstTurbNode + 1][dn1] = (double)para->getParH(level)->vyy[pos];
-                    nodedata[firstTurbNode + 2][dn1] = (double)para->getParH(level)->vzz[pos];
-                    nodedata[firstTurbNode + 3][dn1] = (double)para->getParH(level)->vxy[pos];
-                    nodedata[firstTurbNode + 4][dn1] = (double)para->getParH(level)->vxz[pos];
-                    nodedata[firstTurbNode + 5][dn1] = (double)para->getParH(level)->vyz[pos];
+                    nodeData[firstTurbulenceNode    ][dn1] = (double)para->getParH(level)->vxx[pos];
+                    nodeData[firstTurbulenceNode + 1][dn1] = (double)para->getParH(level)->vyy[pos];
+                    nodeData[firstTurbulenceNode + 2][dn1] = (double)para->getParH(level)->vzz[pos];
+                    nodeData[firstTurbulenceNode + 3][dn1] = (double)para->getParH(level)->vxy[pos];
+                    nodeData[firstTurbulenceNode + 4][dn1] = (double)para->getParH(level)->vxz[pos];
+                    nodeData[firstTurbulenceNode + 5][dn1] = (double)para->getParH(level)->vyz[pos];
                 }
 
                 //////////////////////////////////////////////////////////////////////////
@@ -291,21 +296,21 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
                     para->getParH(level)->typeOfGridNode[number7] != GEO_FLUID ||
                     para->getParH(level)->typeOfGridNode[number8] != GEO_FLUID)  neighborsAreFluid = false;
                 //////////////////////////////////////////////////////////////////////////
-                if (number2 > endpos ||
-                    number3 > endpos ||
-                    number4 > endpos ||
-                    number5 > endpos ||
-                    number6 > endpos ||
-                    number7 > endpos ||
-                    number8 > endpos)  neighborsAreFluid = false;
-                //////////////////////////////////////////////////////////////////////////
-                dn2 = number2 - startpos;
-                dn3 = number3 - startpos;
-                dn4 = number4 - startpos;
-                dn5 = number5 - startpos;
-                dn6 = number6 - startpos;
-                dn7 = number7 - startpos;
-                dn8 = number8 - startpos;
+                if (number2 > endPosition ||
+                    number3 > endPosition ||
+                    number4 > endPosition ||
+                    number5 > endPosition ||
+                    number6 > endPosition ||
+                    number7 > endPosition ||
+                    number8 > endPosition)  neighborsAreFluid = false;
+                //////////////////////////////////////////////////////////////////////////
+                dn2 = number2 - startPosition;
+                dn3 = number3 - startPosition;
+                dn4 = number4 - startPosition;
+                dn5 = number5 - startPosition;
+                dn6 = number6 - startPosition;
+                dn7 = number7 - startPosition;
+                dn8 = number8 - startPosition;
                 //////////////////////////////////////////////////////////////////////////
                 if (isPeriodicCell(para, level, number2, number1, number3, number5))
                     continue;
@@ -314,164 +319,53 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
                     cells.push_back(makeUbTuple(dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8));
             }
         }
-        WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodedatanames, nodedata);
+        outFNames.push_back( WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodeDataNames, nodeData) );
     }
+    return outFNames;
 }
 
-void FileWriter::writeUnstrucuredGridLTConc(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname)
+std::vector<std::string> FileWriter::writeUnstructuredGridMedianLT(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname)
 {
-    std::vector< UbTupleFloat3 > nodes;
-    std::vector< UbTupleUInt8 > cells;
-    std::vector< std::string > nodedatanames;
-    nodedatanames.push_back("press");
-    nodedatanames.push_back("rho");
-    nodedatanames.push_back("vx1");
-    nodedatanames.push_back("vx2");
-    nodedatanames.push_back("vx3");
-    nodedatanames.push_back("geo");
-    nodedatanames.push_back("conc");
-    unsigned int number1, number2, number3, number4, number5, number6, number7, number8;
-    uint dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8;
-    bool neighborsAreFluid;
-    unsigned int startpos = 0;
-    unsigned int endpos = 0;
-    unsigned int sizeOfNodes = 0;
-    std::vector< std::vector< double > > nodedata(nodedatanames.size());
+    std::vector< std::string > outFNames;
 
-    for (unsigned int part = 0; part < fname.size(); part++)
-    {
-        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
-            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
-        else
-            sizeOfNodes = para->getlimitOfNodesForVTK();
-
-        //////////////////////////////////////////////////////////////////////////
-        startpos = part * para->getlimitOfNodesForVTK();
-        endpos = startpos + sizeOfNodes;
-        //////////////////////////////////////////////////////////////////////////
-        cells.clear();
-        nodes.resize(sizeOfNodes);
-        nodedata[0].resize(sizeOfNodes);
-        nodedata[1].resize(sizeOfNodes);
-        nodedata[2].resize(sizeOfNodes);
-        nodedata[3].resize(sizeOfNodes);
-        nodedata[4].resize(sizeOfNodes);
-        nodedata[5].resize(sizeOfNodes);
-        nodedata[6].resize(sizeOfNodes);
-        //////////////////////////////////////////////////////////////////////////
-        for (unsigned int pos = startpos; pos < endpos; pos++)
-        {
-            if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
-            {
-                //////////////////////////////////////////////////////////////////////////
-                double x1 = para->getParH(level)->coordinateX[pos];
-                double x2 = para->getParH(level)->coordinateY[pos];
-                double x3 = para->getParH(level)->coordinateZ[pos];
-                //////////////////////////////////////////////////////////////////////////
-                number1 = pos;
-                dn1 = pos - startpos;
-                neighborsAreFluid = true;
-                //////////////////////////////////////////////////////////////////////////
-                nodes[dn1] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
-                nodedata[0][dn1] = (double)para->getParH(level)->pressure[pos] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
-                nodedata[1][dn1] = (double)para->getParH(level)->rho[pos] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
-                nodedata[2][dn1] = (double)para->getParH(level)->velocityX[pos] * (double)para->getVelocityRatio();
-                nodedata[3][dn1] = (double)para->getParH(level)->velocityY[pos] * (double)para->getVelocityRatio();
-                nodedata[4][dn1] = (double)para->getParH(level)->velocityZ[pos] * (double)para->getVelocityRatio();
-                nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
-                nodedata[6][dn1] = (double)para->getParH(level)->concentration[pos];
-                //////////////////////////////////////////////////////////////////////////
-                number2 = para->getParH(level)->neighborX[number1];
-                number3 = para->getParH(level)->neighborY[number2];
-                number4 = para->getParH(level)->neighborY[number1];
-                number5 = para->getParH(level)->neighborZ[number1];
-                number6 = para->getParH(level)->neighborZ[number2];
-                number7 = para->getParH(level)->neighborZ[number3];
-                number8 = para->getParH(level)->neighborZ[number4];
-                //////////////////////////////////////////////////////////////////////////
-                if (para->getParH(level)->typeOfGridNode[number2] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number3] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number4] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number5] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number6] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number7] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number8] != GEO_FLUID)  neighborsAreFluid = false;
-                //////////////////////////////////////////////////////////////////////////
-                if (number2 > endpos ||
-                    number3 > endpos ||
-                    number4 > endpos ||
-                    number5 > endpos ||
-                    number6 > endpos ||
-                    number7 > endpos ||
-                    number8 > endpos)  neighborsAreFluid = false;
-                //////////////////////////////////////////////////////////////////////////
-                dn2 = number2 - startpos;
-                dn3 = number3 - startpos;
-                dn4 = number4 - startpos;
-                dn5 = number5 - startpos;
-                dn6 = number6 - startpos;
-                dn7 = number7 - startpos;
-                dn8 = number8 - startpos;
-                //////////////////////////////////////////////////////////////////////////
-                if (isPeriodicCell(para, level, number2, number1, number3, number5))
-                    continue;
-                //////////////////////////////////////////////////////////////////////////
-                if (neighborsAreFluid)
-                    cells.push_back(makeUbTuple(dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8));
-            }
-        }
-        WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodedatanames, nodedata);
-    }
-}
-
-void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname)
-{
     std::vector< UbTupleFloat3 > nodes;
     std::vector< UbTupleUInt8 > cells;
     //std::vector< UbTupleUInt8 > cells2;
-    std::vector< std::string > nodedatanames;
-    nodedatanames.push_back("pressMed");
-    nodedatanames.push_back("rhoMed");
-    nodedatanames.push_back("vx1Med");
-    nodedatanames.push_back("vx2Med");
-    nodedatanames.push_back("vx3Med");
-    nodedatanames.push_back("geo");
+    std::vector< std::string > nodeDataNames = getMedianNodeDataNames(para);
+    int startIndex = para->getDiffOn()? 1 : 0;
+
     unsigned int number1, number2, number3, number4, number5, number6, number7, number8;
     unsigned int dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8;
     bool neighborsFluid;
-    unsigned int startpos = 0;
-    unsigned int endpos = 0;
-    unsigned int sizeOfNodes = 0;
-    std::vector< std::vector< double > > nodedata(nodedatanames.size());
+    unsigned int startPosition;
+    unsigned int endPosition;
+    unsigned int sizeOfNodes;
+    std::vector< std::vector< double > > nodeData(nodeDataNames.size());
 
     //printf("\n test for if... \n");
     for (unsigned int part = 0; part < fname.size(); part++)
     {
         //printf("\n test in if I... \n");
         //////////////////////////////////////////////////////////////////////////
-        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+        if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
         {
-            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
         }
         else
         {
-            sizeOfNodes = para->getlimitOfNodesForVTK();
+            sizeOfNodes = para->getLimitOfNodesForVTK();
         }
         //////////////////////////////////////////////////////////////////////////
-        startpos = part * para->getlimitOfNodesForVTK();
-        endpos = startpos + sizeOfNodes;
+        startPosition = part * para->getLimitOfNodesForVTK();
+        endPosition = startPosition + sizeOfNodes;
         //////////////////////////////////////////////////////////////////////////
         cells.clear();
         nodes.resize(sizeOfNodes);
-        nodedata[0].resize(sizeOfNodes);
-        nodedata[1].resize(sizeOfNodes);
-        nodedata[2].resize(sizeOfNodes);
-        nodedata[3].resize(sizeOfNodes);
-        nodedata[4].resize(sizeOfNodes);
-        nodedata[5].resize(sizeOfNodes);
+        for (size_t i = 0; i < nodeDataNames.size(); i++)
+            nodeData[i].resize(sizeOfNodes);
         //////////////////////////////////////////////////////////////////////////
         //printf("\n test in if II... \n");
-        for (unsigned int pos = startpos; pos < endpos; pos++)
+        for (unsigned int pos = startPosition; pos < endPosition; pos++)
         {
             if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
             {
@@ -481,16 +375,18 @@ void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, i
                 double x3 = para->getParH(level)->coordinateZ[pos];
                 //////////////////////////////////////////////////////////////////////////
                 number1 = pos;
-                dn1 = pos - startpos;
+                dn1 = pos - startPosition;
                 neighborsFluid = true;
                 //////////////////////////////////////////////////////////////////////////
                 nodes[dn1] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
-                nodedata[0][dn1] = para->getParH(level)->press_SP_Med_Out[pos] / 3.0f * para->getDensityRatio() * para->getVelocityRatio() * para->getVelocityRatio();
-                nodedata[1][dn1] = para->getParH(level)->rho_SP_Med_Out[pos] / 3.0f * para->getDensityRatio() * para->getVelocityRatio() * para->getVelocityRatio();
-                nodedata[2][dn1] = para->getParH(level)->vx_SP_Med_Out[pos] * para->getVelocityRatio();
-                nodedata[3][dn1] = para->getParH(level)->vy_SP_Med_Out[pos] * para->getVelocityRatio();
-                nodedata[4][dn1] = para->getParH(level)->vz_SP_Med_Out[pos] * para->getVelocityRatio();
-                nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
+                if(para->getDiffOn())
+                    nodeData[0][dn1] = (double)para->getParH(level)->Conc_Med_Out[pos];
+                nodeData[startIndex    ][dn1] = para->getParH(level)->press_SP_Med_Out[pos] / 3.0f * para->getDensityRatio() * para->getVelocityRatio() * para->getVelocityRatio();
+                nodeData[startIndex + 1][dn1] = para->getParH(level)->rho_SP_Med_Out[pos] / 3.0f * para->getDensityRatio() * para->getVelocityRatio() * para->getVelocityRatio();
+                nodeData[startIndex + 2][dn1] = para->getParH(level)->vx_SP_Med_Out[pos] * para->getVelocityRatio();
+                nodeData[startIndex + 3][dn1] = para->getParH(level)->vy_SP_Med_Out[pos] * para->getVelocityRatio();
+                nodeData[startIndex + 4][dn1] = para->getParH(level)->vz_SP_Med_Out[pos] * para->getVelocityRatio();
+                nodeData[startIndex + 5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
                 //////////////////////////////////////////////////////////////////////////
                 number2 = para->getParH(level)->neighborX[number1];
                 number3 = para->getParH(level)->neighborY[number2];
@@ -508,21 +404,21 @@ void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, i
                     para->getParH(level)->typeOfGridNode[number7] != GEO_FLUID ||
                     para->getParH(level)->typeOfGridNode[number8] != GEO_FLUID)  neighborsFluid = false;
                 //////////////////////////////////////////////////////////////////////////
-                if (number2 > endpos ||
-                    number3 > endpos ||
-                    number4 > endpos ||
-                    number5 > endpos ||
-                    number6 > endpos ||
-                    number7 > endpos ||
-                    number8 > endpos)  neighborsFluid = false;
-                //////////////////////////////////////////////////////////////////////////
-                dn2 = number2 - startpos;
-                dn3 = number3 - startpos;
-                dn4 = number4 - startpos;
-                dn5 = number5 - startpos;
-                dn6 = number6 - startpos;
-                dn7 = number7 - startpos;
-                dn8 = number8 - startpos;
+                if (number2 > endPosition ||
+                    number3 > endPosition ||
+                    number4 > endPosition ||
+                    number5 > endPosition ||
+                    number6 > endPosition ||
+                    number7 > endPosition ||
+                    number8 > endPosition)  neighborsFluid = false;
+                //////////////////////////////////////////////////////////////////////////
+                dn2 = number2 - startPosition;
+                dn3 = number3 - startPosition;
+                dn4 = number4 - startPosition;
+                dn5 = number5 - startPosition;
+                dn6 = number6 - startPosition;
+                dn7 = number7 - startPosition;
+                dn8 = number8 - startPosition;
                 //////////////////////////////////////////////////////////////////////////
                 if (isPeriodicCell(para, level, number2, number1, number3, number5))
                     continue;
@@ -531,122 +427,8 @@ void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, i
                 //////////////////////////////////////////////////////////////////////////
             }
         }
-        WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodedatanames, nodedata);
+        outFNames.push_back(WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodeDataNames, nodeData));
         //////////////////////////////////////////////////////////////////////////
     }
-}
-
-void FileWriter::writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname)
-{
-    std::vector< UbTupleFloat3 > nodes;
-    std::vector< UbTupleUInt8 > cells;
-    std::vector< std::string > nodedatanames;
-    nodedatanames.push_back("concMed");
-    nodedatanames.push_back("pressMed");
-    nodedatanames.push_back("rhoMed");
-    nodedatanames.push_back("vx1Med");
-    nodedatanames.push_back("vx2Med");
-    nodedatanames.push_back("vx3Med");
-    nodedatanames.push_back("geo");
-    uint number1, number2, number3, number4, number5, number6, number7, number8;
-    uint dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8;
-    bool neighborsFluid;
-    uint startpos = 0;
-    uint endpos = 0;
-    uint sizeOfNodes = 0;
-    std::vector< std::vector< double > > nodedata(nodedatanames.size());
-
-    for (unsigned int part = 0; part < fname.size(); part++)
-    {
-        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
-            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
-        else
-            sizeOfNodes = para->getlimitOfNodesForVTK();
-        //////////////////////////////////////////////////////////////////////////
-        startpos = part * para->getlimitOfNodesForVTK();
-        endpos = startpos + sizeOfNodes;
-        //////////////////////////////////////////////////////////////////////////
-        cells.clear();
-        nodes.resize(sizeOfNodes);
-        nodedata[0].resize(sizeOfNodes);
-        nodedata[1].resize(sizeOfNodes);
-        nodedata[2].resize(sizeOfNodes);
-        nodedata[3].resize(sizeOfNodes);
-        nodedata[4].resize(sizeOfNodes);
-        nodedata[5].resize(sizeOfNodes);
-        nodedata[6].resize(sizeOfNodes);
-        //////////////////////////////////////////////////////////////////////////
-        for (unsigned int pos = startpos; pos < endpos; pos++)
-        {
-            if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
-            {
-                //////////////////////////////////////////////////////////////////////////
-                double x1 = para->getParH(level)->coordinateX[pos];
-                double x2 = para->getParH(level)->coordinateY[pos];
-                double x3 = para->getParH(level)->coordinateZ[pos];
-                //////////////////////////////////////////////////////////////////////////
-                number1 = pos;
-                dn1 = pos - startpos;
-                neighborsFluid = true;
-                //////////////////////////////////////////////////////////////////////////
-                nodes[dn1] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
-                nodedata[0][dn1] = (double)para->getParH(level)->Conc_Med_Out[pos];
-                nodedata[1][dn1] = (double)para->getParH(level)->press_SP_Med_Out[pos] / 3.0f * para->getDensityRatio() * para->getVelocityRatio() * para->getVelocityRatio();
-                nodedata[2][dn1] = (double)para->getParH(level)->rho_SP_Med_Out[pos] / 3.0f * para->getDensityRatio() * para->getVelocityRatio() * para->getVelocityRatio();
-                nodedata[3][dn1] = (double)para->getParH(level)->vx_SP_Med_Out[pos] * para->getVelocityRatio();
-                nodedata[4][dn1] = (double)para->getParH(level)->vy_SP_Med_Out[pos] * para->getVelocityRatio();
-                nodedata[5][dn1] = (double)para->getParH(level)->vz_SP_Med_Out[pos] * para->getVelocityRatio();
-                nodedata[6][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
-                //////////////////////////////////////////////////////////////////////////
-                number2 = para->getParH(level)->neighborX[number1];
-                number3 = para->getParH(level)->neighborY[number2];
-                number4 = para->getParH(level)->neighborY[number1];
-                number5 = para->getParH(level)->neighborZ[number1];
-                number6 = para->getParH(level)->neighborZ[number2];
-                number7 = para->getParH(level)->neighborZ[number3];
-                number8 = para->getParH(level)->neighborZ[number4];
-                //////////////////////////////////////////////////////////////////////////
-                if (para->getParH(level)->typeOfGridNode[number2] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number3] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number4] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number5] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number6] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number7] != GEO_FLUID ||
-                    para->getParH(level)->typeOfGridNode[number8] != GEO_FLUID)  neighborsFluid = false;
-                //////////////////////////////////////////////////////////////////////////
-                if (number2 > endpos ||
-                    number3 > endpos ||
-                    number4 > endpos ||
-                    number5 > endpos ||
-                    number6 > endpos ||
-                    number7 > endpos ||
-                    number8 > endpos)  neighborsFluid = false;
-                //////////////////////////////////////////////////////////////////////////
-                dn2 = number2 - startpos;
-                dn3 = number3 - startpos;
-                dn4 = number4 - startpos;
-                dn5 = number5 - startpos;
-                dn6 = number6 - startpos;
-                dn7 = number7 - startpos;
-                dn8 = number8 - startpos;
-                //////////////////////////////////////////////////////////////////////////
-                if (isPeriodicCell(para, level, number2, number1, number3, number5))
-                    continue;
-                //////////////////////////////////////////////////////////////////////////
-                if (neighborsFluid)
-                    cells.push_back(makeUbTuple(dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8));
-                //////////////////////////////////////////////////////////////////////////
-            }
-        }
-        WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodedatanames, nodedata);
-        //////////////////////////////////////////////////////////////////////////
-    }
-}
-//////////////////////////////////////////////////////////////////////////
-
-
-
-
-
-
-
+    return outFNames;
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.h b/src/gpu/VirtualFluids_GPU/Output/FileWriter.h
index 2d7c176fa067ea86573849e90bd91dab259526b4..0a2aec490af0f9ef1f75fe4f64cfcb219e6167d5 100644
--- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.h
+++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.h
@@ -16,22 +16,22 @@ struct PN27;
 class FileWriter : public DataWriter
 {
 public:
-	void writeInit(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager) override;
-	void writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep) override;
+    void writeInit(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager) override;
+    void writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep) override;
 
 private:
-	void writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep, int level) override;
-	//void writeParticle(Parameter* para, unsigned int t);
-    void writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int level,
+    void writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep, int level) override;
+    std::vector<std::string> writeUnstructuredGridLT(std::shared_ptr<Parameter> para, int level,
                                                          std::vector<std::string> &fname);
-	void writeUnstrucuredGridLTConc(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname);
-	void writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname);
-	void writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname);
-	bool isPeriodicCell(std::shared_ptr<Parameter> para, int level, unsigned int number2, unsigned int number1, unsigned int number3, unsigned int number5);
+    std::vector<std::string> writeUnstructuredGridMedianLT(std::shared_ptr<Parameter> para, int level, std::vector<std::string >& fname);
+    bool isPeriodicCell(std::shared_ptr<Parameter> para, int level, unsigned int number2, unsigned int number1, unsigned int number3, unsigned int number5);
 
-    void writeCollectionFile( std::shared_ptr<Parameter> para, unsigned int timestep );
+    std::string writeCollectionFile( std::shared_ptr<Parameter> para, unsigned int timestep );
 
-    void writeCollectionFileMedian( std::shared_ptr<Parameter> para, unsigned int timestep );
+    std::string writeCollectionFileMedian( std::shared_ptr<Parameter> para, unsigned int timestep );
+
+    std::vector<std::string> getNodeDataNames(std::shared_ptr<Parameter> para);
+    std::vector<std::string> getMedianNodeDataNames(std::shared_ptr<Parameter> para);
 
     std::vector< std::string > fileNamesForCollectionFile;
     std::vector< std::string > fileNamesForCollectionFileMedian;
diff --git a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
index cafe70205455ae8592c1efe86e4ba9de8e1ba170..5d5548ba7e64a23152126446e3c438da1c7e7653 100644
--- a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
@@ -197,16 +197,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+			if ( ((part+1)*para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -340,16 +340,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -479,16 +479,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -628,16 +628,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -771,16 +771,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -1342,16 +1342,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+			if ( ((part+1)*para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -1465,16 +1465,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -1595,16 +1595,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -1975,16 +1975,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -2080,16 +2080,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
@@ -2192,16 +2192,16 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getLimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getLimitOfNodesForVTK());
 			}
 			else
 			{
-				sizeOfNodes = para->getlimitOfNodesForVTK();
+				sizeOfNodes = para->getLimitOfNodesForVTK();
 			}
 			//////////////////////////////////////////////////////////////////////////
-			startpos = part * para->getlimitOfNodesForVTK();
+			startpos = part * para->getLimitOfNodesForVTK();
 			endpos = startpos + sizeOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			cells.clear();
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
index 3cc771e413134e90b0d09d8eeb6dfee791f8a1e2..c6db87138610a6c029a06e32b70f496dc2306a83 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
@@ -31,15 +31,14 @@
 #include <helper_cuda.h>
 #include <iostream>
 
-void CudaStreamManager::registerStream(CudaStreamIndex streamIndex)
+int CudaStreamManager::registerAndLaunchStream(CudaStreamIndex streamIndex)
 {   
-    if(streamIndex != CudaStreamIndex::Legacy)
-        cudaStreams.emplace(streamIndex, nullptr);
-}
-void CudaStreamManager::launchStreams()
-{
-    for (auto &stream : cudaStreams)
-        cudaStreamCreate(&stream.second);
+    if(streamIndex == CudaStreamIndex::Legacy) return 0;
+
+    cudaStream_t new_stream = nullptr;
+    cudaStreamCreate(&new_stream);
+    cudaStreams.emplace(streamIndex, new_stream);
+    return int(cudaStreams.count(streamIndex) - 1);
 }
 
 void CudaStreamManager::terminateStreams()
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
index 631a945a653e6b4b60924a650e94b3873ebacc7d..98032ef16adebf56f10d8ed5e9c04bbf517876c3 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -43,27 +43,26 @@ enum class CudaStreamIndex
         Precursor,
         ActuatorFarm
     };
+
 class CudaStreamManager
-{   
+{
 private:
     std::multimap<CudaStreamIndex, cudaStream_t> cudaStreams;
     cudaEvent_t startBulkKernel = NULL;
     cudaStream_t legacyStream = CU_STREAM_LEGACY;
 
-
 public:
-    void registerStream(CudaStreamIndex streamIndex);
-    void launchStreams();
+    int registerAndLaunchStream(CudaStreamIndex streamIndex);
     void terminateStreams();
-    cudaStream_t &getStream(CudaStreamIndex streamIndex, uint multiStreamIndex=0);
+    cudaStream_t &getStream(CudaStreamIndex streamIndex, uint multiStreamIndex = 0);
 
     bool streamIsRegistered(CudaStreamIndex streamIndex);
     // Events
     void createCudaEvents();
     void destroyCudaEvents();
 
-    void triggerStartBulkKernel(CudaStreamIndex streamIndex, uint multiStreamIndex=0);
-    void waitOnStartBulkKernelEvent(CudaStreamIndex streamIndex, uint multiStreamIndex=0);
+    void triggerStartBulkKernel(CudaStreamIndex streamIndex, uint multiStreamIndex = 0);
+    void waitOnStartBulkKernelEvent(CudaStreamIndex streamIndex, uint multiStreamIndex = 0);
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index e75acaba0adceb7fdc69c34b8f71054b5da6b8e6..9044f69dccdc848192f62b02c737b8ec961d4ba8 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -1721,7 +1721,7 @@ unsigned int Parameter::getOutputCount()
 {
     return this->outputCount;
 }
-unsigned int Parameter::getlimitOfNodesForVTK()
+unsigned int Parameter::getLimitOfNodesForVTK()
 {
     return this->limitOfNodesForVTK;
 }
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 5944cf66caed4f680ff0480c7b7c39ff7d237aab..89398e063027fe062cce240e397eff59a67732c2 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -700,7 +700,7 @@ public:
     real getEndXHotWall();
     unsigned int getStepEnsight();
     unsigned int getOutputCount();
-    unsigned int getlimitOfNodesForVTK();
+    unsigned int getLimitOfNodesForVTK();
     unsigned int getStartTurn();
     bool getEvenOrOdd(int level);
     bool getDiffOn();
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
index bcdd63657d13cd8a9dcef3372fe02760a337b057..f2d4b27a159c3f3687bd58933b55558ca08cd16d 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
@@ -310,7 +310,7 @@ void ActuatorFarm::init(Parameter* para, GridProvider* gridProvider, CudaMemoryM
     this->initBladeVelocities(cudaMemoryManager);
     this->initBladeForces(cudaMemoryManager);
     this->initBoundingSpheres(para, cudaMemoryManager);
-    this->streamIndex = 0;
+    this->streamIndex = para->getStreamManager()->registerAndLaunchStream(CudaStreamIndex::ActuatorFarm);
 }
 
 void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, unsigned int t)
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
index 811045a32b18fd0f5d7f71be39b0dfec8982b352..90c434bd60e623175d5c8bfbbf23fa41f01e8a37 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
@@ -16,8 +16,6 @@ class CudaMemoryManager;
 
 class VIRTUALFLUIDS_GPU_EXPORT PreCollisionInteractor
 {
-private:
-    SPtr<Parameter> para;
 
 protected:
     PreCollisionInteractor()
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
index 99c60fd3d2aae2e796e0c95e624b9d5d33c30ef1..b1ebaf28edc7966074f7cc96e31bf8489ca8e4a9 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
@@ -208,7 +208,7 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
         precursorStructs[level]->origin = makeUbTuple(lowestY, lowestZ);
         precursorStructs[level]->extent = makeUbTuple(0, ny-1, 0, nz-1);
         precursorStructs[level]->numberOfPointsInData = ny*nz;
-        precursorStructs[level]->numberOfTimestepsPerFile = min(para->getlimitOfNodesForVTK()/(ny*nz), maxtimestepsPerFile);
+        precursorStructs[level]->numberOfTimestepsPerFile = min(para->getLimitOfNodesForVTK()/(ny*nz), maxtimestepsPerFile);
         precursorStructs[level]->numberOfFilesWritten = 0;
         precursorStructs[level]->numberOfTimestepsBuffered = 0;
         
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
index e89d392b5d4bf5983f9bb47642fef81d0f06cc89..705b7173606d1956c50c59d5cc1f7635a4b7883b 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
@@ -16,9 +16,11 @@
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
 #include "GPU/GPU_Interface.h"
+#include "basics/constants/NumericConstants.h"
 
 #include <algorithm>
 
+using namespace vf::basics::constant;
 ///////////////////////////////////////////////////////////////////////////////////
 /// Functors for thrust reductions
 ///////////////////////////////////////////////////////////////////////////////////
@@ -307,7 +309,7 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
         neighborInplane2 = para->getParD(level)->neighborY;
     }
 
-    bool doTmpAveraging = t_level>=(this->getTStartTmpAveraging()*pow(2,level));
+    bool doTmpAveraging = t_level>=(this->getTStartTmpAveraging()*exp2(level));
 
     // Pointer casts to use device arrays in thrust reductions
     thrust::device_ptr<uint> indices_thrust = thrust::device_pointer_cast(probeStruct->pointIndicesD);
@@ -317,7 +319,7 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
     thrust::device_ptr<real> nut_thrust = thrust::device_pointer_cast(para->getParD(level)->turbViscosity);
 
     real N = (real)probeStruct->nIndices;
-    real n = (real)probeStruct->vals;
+    real invNumberOfTimestepsInTmpAvg = c1o1/real(probeStruct->timestepInTimeAverage+1);
     uint nPoints = probeStruct->nPoints;
     // Permutation iterators for direct iteration over the velocities of the planes
     typedef thrust::device_vector<real>::iterator valIterator;
@@ -353,17 +355,17 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
 
             if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalMeans)] && doTmpAveraging)
             {
-            uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalMeans)];
-            real spatTmpMean_vx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node];
-            real spatTmpMean_vy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node];
-            real spatTmpMean_vz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node];
-            real spatTmpMean_nut_old;
-            if(para->getUseTurbulentViscosity()) spatTmpMean_nut_old = probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node];;
-
-            probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vx-spatTmpMean_vx_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vy-spatTmpMean_vy_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vz-spatTmpMean_vz_old)/n;
-            if(para->getUseTurbulentViscosity()) probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] += (spatMean_nut-spatTmpMean_nut_old)/n;
+                uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalMeans)];
+                real spatTmpMean_vx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node];
+                real spatTmpMean_vy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node];
+                real spatTmpMean_vz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node];
+                real spatTmpMean_nut_old;
+                if(para->getUseTurbulentViscosity()) spatTmpMean_nut_old = probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node];;
+
+                probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vx-spatTmpMean_vx_old)*invNumberOfTimestepsInTmpAvg;
+                probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vy-spatTmpMean_vy_old)*invNumberOfTimestepsInTmpAvg;
+                probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vz-spatTmpMean_vz_old)*invNumberOfTimestepsInTmpAvg;
+                if(para->getUseTurbulentViscosity()) probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] += (spatMean_nut-spatTmpMean_nut_old)*invNumberOfTimestepsInTmpAvg;
 
             }
         
@@ -400,12 +402,12 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
                     real spatTmpMean_vxvz_old = probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+node];
                     real spatTmpMean_vyvz_old = probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+node];
 
-                    probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vxvx-spatTmpMean_vxvx_old)/n;
-                    probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vyvy-spatTmpMean_vyvy_old)/n;
-                    probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vzvz-spatTmpMean_vzvz_old)/n;
-                    probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] += (spatMean_vxvy-spatTmpMean_vxvy_old)/n;
-                    probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+node] += (spatMean_vxvz-spatTmpMean_vxvz_old)/n;
-                    probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+node] += (spatMean_vyvz-spatTmpMean_vyvz_old)/n;
+                    probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vxvx-spatTmpMean_vxvx_old)*invNumberOfTimestepsInTmpAvg;
+                    probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vyvy-spatTmpMean_vyvy_old)*invNumberOfTimestepsInTmpAvg;
+                    probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vzvz-spatTmpMean_vzvz_old)*invNumberOfTimestepsInTmpAvg;
+                    probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] += (spatMean_vxvy-spatTmpMean_vxvy_old)*invNumberOfTimestepsInTmpAvg;
+                    probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+node] += (spatMean_vxvz-spatTmpMean_vxvz_old)*invNumberOfTimestepsInTmpAvg;
+                    probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+node] += (spatMean_vyvz-spatTmpMean_vyvz_old)*invNumberOfTimestepsInTmpAvg;
                 }
 
                 if(probeStruct->quantitiesH[int(Statistic::SpatialSkewness)])
@@ -435,9 +437,9 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
                         real spatTmpMean_Sy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node];
                         real spatTmpMean_Sz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node];
 
-                        probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_Sx-spatTmpMean_Sx_old)/n;
-                        probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_Sy-spatTmpMean_Sy_old)/n;
-                        probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_Sz-spatTmpMean_Sz_old)/n;
+                        probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_Sx-spatTmpMean_Sx_old)*invNumberOfTimestepsInTmpAvg;
+                        probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_Sy-spatTmpMean_Sy_old)*invNumberOfTimestepsInTmpAvg;
+                        probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_Sz-spatTmpMean_Sz_old)*invNumberOfTimestepsInTmpAvg;
                     }
 
                     if(probeStruct->quantitiesH[int(Statistic::SpatialFlatness)])
@@ -464,9 +466,9 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
                             real spatTmpMean_Fy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node];
                             real spatTmpMean_Fz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node];
 
-                            probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_Fx-spatTmpMean_Fx_old)/n;
-                            probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_Fy-spatTmpMean_Fy_old)/n;
-                            probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_Fz-spatTmpMean_Fz_old)/n;
+                            probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_Fx-spatTmpMean_Fx_old)*invNumberOfTimestepsInTmpAvg;
+                            probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_Fy-spatTmpMean_Fy_old)*invNumberOfTimestepsInTmpAvg;
+                            probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_Fz-spatTmpMean_Fz_old)*invNumberOfTimestepsInTmpAvg;
                         }
                     }
                 }
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
index 3d3533f74501e776f9150c83c9d9101a0be7ecbc..c2370231c0bc341ca9b3980578ce4aadc1b75c7a 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
@@ -90,6 +90,8 @@ private:
                     std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
                     int level) override;
     void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) override;
+    void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override {};
+
 
 private:
     real posX, posY, posZ;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
index f55045505bff0e3b5b0b1426be4e9e1a3832d088..19f7f6c62ae7ac83c90fc2a7aff0e286a70063d1 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -102,10 +102,23 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
 void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
-    calcQuantitiesKernel<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals,
-    para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, para->getParD(level)->rho, 
-    para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, 
-    probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD);
+    calcQuantitiesKernel<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD,
+                                                        probeStruct->nPoints,
+                                                        0,
+                                                        0,
+                                                        probeStruct->timestepInTimeAverage,
+                                                        probeStruct->nTimesteps,
+                                                        para->getParD(level)->velocityX,
+                                                        para->getParD(level)->velocityY,
+                                                        para->getParD(level)->velocityZ,
+                                                        para->getParD(level)->rho,
+                                                        para->getParD(level)->neighborX,
+                                                        para->getParD(level)->neighborY,
+                                                        para->getParD(level)->neighborZ,
+                                                        probeStruct->quantitiesD,
+                                                        probeStruct->arrayOffsetsD,
+                                                        probeStruct->quantitiesArrayD
+                                                        );
 }
 
 void PlaneProbe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
index 89e1f6b87687ed42c079415a5340f1d385c8d62c..19c170608a606227d21c25791776bd3195b16e04 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -103,13 +103,22 @@ void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
 void PointProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
-    interpAndCalcQuantitiesKernel<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals,
+    int oldTimestepInTimeseries = this->outputTimeSeries ? calcOldTimestep(probeStruct->timestepInTimeseries, probeStruct->lastTimestepInOldTimeseries) : 0;
+    int currentTimestep = this->outputTimeSeries ? probeStruct->timestepInTimeseries : 0;
+    interpAndCalcQuantitiesKernel<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, oldTimestepInTimeseries, currentTimestep, probeStruct->timestepInTimeAverage, probeStruct->nTimesteps,
                                                 probeStruct->distXD, probeStruct->distYD, probeStruct->distZD,
                                                 para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, para->getParD(level)->rho, 
                                                 para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, 
                                                 probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD);
 }
 
+void PointProbe::addProbePoint(real pointCoordX, real pointCoordY, real pointCoordZ)
+{
+    this->pointCoordsX.push_back(pointCoordX);
+    this->pointCoordsY.push_back(pointCoordY);
+    this->pointCoordsZ.push_back(pointCoordZ);
+}
+
 void PointProbe::addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ)
 {
     bool isSameLength = ( (_pointCoordsX.size()==_pointCoordsY.size()) && (_pointCoordsY.size()==_pointCoordsZ.size()));
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
index 08c359705f03b20fbd3276fe209b6ff4d782a5e5..b26211dc859bf4601b42a640d25e656b3552300b 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
@@ -50,7 +50,8 @@ public:
         uint _tStartAvg,
         uint _tAvg,
         uint _tStartOut,
-        uint _tOut
+        uint _tOut,
+        bool _outputTimeseries = false
     ): Probe(_probeName, 
              _outputPath,
              _tStartAvg, 
@@ -59,9 +60,10 @@ public:
              _tStartOut, 
              _tOut,
              true,
-             false)
+             _outputTimeseries)
     {}
 
+    void addProbePoint(real pointCoordX, real pointCoordY, real pointCoordZ);
     void addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ);
     void addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, uint n_y, uint n_z);
     void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
@@ -80,6 +82,8 @@ private:
 
 private:
     std::vector<real> pointCoordsX, pointCoordsY, pointCoordsZ; 
+    uint getNumberOfTimestepsInTimeseries(Parameter* para, int level) override { return outputTimeSeries ? tOut*exp2(level) : 1; }
+
 
 };
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
index a7a0e79c0bcbf0f7a9e13e879debfb378e23f69d..42e45a5bb83d41e74e88378da60b142509e09bc9 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -47,69 +47,92 @@
 
 using namespace vf::basics::constant;
 
-__device__ void calculatePointwiseQuantities(uint n, real* quantityArray, bool* quantities, uint* quantityArrayOffsets, uint nPoints, uint node, real vx, real vy, real vz, real rho)
+__host__ __device__ int calcArrayIndex(int node, int nNodes, int timestep, int nTimesteps, int array)
+{
+    return node+nNodes*(timestep+nTimesteps*array);
+}
+
+uint calcOldTimestep(uint currentTimestep, uint lastTimestepInOldSeries)
+{
+    return currentTimestep > 0 ? currentTimestep - 1 : lastTimestepInOldSeries; 
+}
+
+__device__ void calculatePointwiseQuantities(
+    uint oldTimestepInTimeseries,
+    uint timestepInTimeseries,
+    uint timestepInAverage,
+    uint nTimesteps,
+    real* quantityArray,
+    bool* quantities,
+    uint* quantityArrayOffsets,
+    uint nPoints,
+    uint node,
+    real vx,
+    real vy,
+    real vz,
+    real rho)
 {
     //"https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm"
     // also has extensions for higher order and covariances
+    int n = timestepInAverage+1;
     real inv_n = 1/real(n);
 
-    if(quantities[int(Statistic::Instantaneous)])
+    if (quantities[int(Statistic::Instantaneous)])
     {
         uint arrOff = quantityArrayOffsets[int(Statistic::Instantaneous)];
-        quantityArray[(arrOff+0)*nPoints+node] = vx;
-        quantityArray[(arrOff+1)*nPoints+node] = vy;
-        quantityArray[(arrOff+2)*nPoints+node] = vz;
-        quantityArray[(arrOff+3)*nPoints+node] = rho;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 0)] = vx;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 1)] = vy;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 2)] = vz;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 3)] = rho;
     }
 
-    if(quantities[int(Statistic::Means)])
+    if (quantities[int(Statistic::Means)])
     {
-        
         uint arrOff = quantityArrayOffsets[int(Statistic::Means)];
-        real vx_m_old  = quantityArray[(arrOff+0)*nPoints+node];
-        real vy_m_old  = quantityArray[(arrOff+1)*nPoints+node];
-        real vz_m_old  = quantityArray[(arrOff+2)*nPoints+node];
-        real rho_m_old = quantityArray[(arrOff+3)*nPoints+node];
-
-        real vx_m_new  = ( (n-1)*vx_m_old + vx  )*inv_n;
-        real vy_m_new  = ( (n-1)*vy_m_old + vy  )*inv_n;
-        real vz_m_new  = ( (n-1)*vz_m_old + vz  )*inv_n;
-        real rho_m_new = ( (n-1)*rho_m_old+ rho )*inv_n;
-
-        quantityArray[(arrOff+0)*nPoints+node] = vx_m_new;
-        quantityArray[(arrOff+1)*nPoints+node] = vy_m_new;
-        quantityArray[(arrOff+2)*nPoints+node] = vz_m_new;
-        quantityArray[(arrOff+3)*nPoints+node] = rho_m_new;
-    
-        if(quantities[int(Statistic::Variances)])
+        real vx_m_old  = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 0)];
+        real vy_m_old  = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 1)];
+        real vz_m_old  = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 2)];
+        real rho_m_old = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 3)];
+
+        real vx_m_new  = ((n - 1) * vx_m_old  + vx)  * inv_n;
+        real vy_m_new  = ((n - 1) * vy_m_old  + vy)  * inv_n;
+        real vz_m_new  = ((n - 1) * vz_m_old  + vz)  * inv_n;
+        real rho_m_new = ((n - 1) * rho_m_old + rho) * inv_n;
+
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 0)] = vx_m_new;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 1)] = vy_m_new;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 2)] = vz_m_new;
+        quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 3)] = rho_m_new;
+
+        if (quantities[int(Statistic::Variances)])
         {
             arrOff = quantityArrayOffsets[int(Statistic::Variances)];
 
-            real vx_var_old  = quantityArray[(arrOff+0)*nPoints+node];
-            real vy_var_old  = quantityArray[(arrOff+1)*nPoints+node];
-            real vz_var_old  = quantityArray[(arrOff+2)*nPoints+node];
-            real rho_var_old = quantityArray[(arrOff+3)*nPoints+node];
+            real vx_var_old  = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 0)];
+            real vy_var_old  = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 1)];
+            real vz_var_old  = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 2)];
+            real rho_var_old = quantityArray[calcArrayIndex(node, nPoints, oldTimestepInTimeseries, nTimesteps, arrOff + 3)];
 
-            real vx_var_new  = ( (n-1)*(vx_var_old )+(vx  - vx_m_old )*(vx  - vx_m_new ) )*inv_n;
-            real vy_var_new  = ( (n-1)*(vy_var_old )+(vy  - vy_m_old )*(vy  - vy_m_new ) )*inv_n;
-            real vz_var_new  = ( (n-1)*(vz_var_old )+(vz  - vz_m_old )*(vz  - vz_m_new ) )*inv_n;
-            real rho_var_new = ( (n-1)*(rho_var_old)+(rho - rho_m_old)*(rho - rho_m_new) )*inv_n;
+            real vx_var_new  = ((n - 1) * (vx_var_old)  + (vx  - vx_m_old)  * (vx  - vx_m_new))  * inv_n;
+            real vy_var_new  = ((n - 1) * (vy_var_old)  + (vy  - vy_m_old)  * (vy  - vy_m_new))  * inv_n;
+            real vz_var_new  = ((n - 1) * (vz_var_old)  + (vz  - vz_m_old)  * (vz  - vz_m_new))  * inv_n;
+            real rho_var_new = ((n - 1) * (rho_var_old) + (rho - rho_m_old) * (rho - rho_m_new)) * inv_n;
 
-            quantityArray[(arrOff+0)*nPoints+node] = vx_var_new;
-            quantityArray[(arrOff+1)*nPoints+node] = vy_var_new;
-            quantityArray[(arrOff+2)*nPoints+node] = vz_var_new;
-            quantityArray[(arrOff+3)*nPoints+node] = rho_var_new; 
+            quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 0)] = vx_var_new;
+            quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 1)] = vy_var_new;
+            quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 2)] = vz_var_new;
+            quantityArray[calcArrayIndex(node, nPoints, timestepInTimeseries, nTimesteps, arrOff + 3)] = rho_var_new;
         }
     }
 }
 
 __global__ void calcQuantitiesKernel(   uint* pointIndices,
-                                    uint nPoints, uint n,
+                                    uint nPoints, uint oldTimestepInTimeseries, uint timestepInTimeseries, uint timestepInAverage, uint nTimesteps,
                                     real* vx, real* vy, real* vz, real* rho,            
                                     uint* neighborX, uint* neighborY, uint* neighborZ,
                                     bool* quantities,
                                     uint* quantityArrayOffsets, real* quantityArray
-                                )
+                                    )
 {
     const uint x = threadIdx.x; 
     const uint y = blockIdx.x;
@@ -132,12 +155,12 @@ __global__ void calcQuantitiesKernel(   uint* pointIndices,
     u_interpZ = vz[k];
     rho_interp = rho[k];
 
-    calculatePointwiseQuantities(n, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp);
+    calculatePointwiseQuantities(oldTimestepInTimeseries, timestepInTimeseries, timestepInAverage, nTimesteps, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp);
 
 }
 
 __global__ void interpAndCalcQuantitiesKernel(   uint* pointIndices,
-                                    uint nPoints, uint n,
+                                    uint nPoints, uint oldTimestepInTimeseries, uint timestepInTimeseries, uint timestepInAverage, uint nTimesteps,
                                     real* distX, real* distY, real* distZ,
                                     real* vx, real* vy, real* vz, real* rho,            
                                     uint* neighborX, uint* neighborY, uint* neighborZ,
@@ -173,7 +196,7 @@ __global__ void interpAndCalcQuantitiesKernel(   uint* pointIndices,
     u_interpZ  = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz );
     rho_interp = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, rho );
 
-    calculatePointwiseQuantities(n, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp);
+    calculatePointwiseQuantities(oldTimestepInTimeseries, timestepInTimeseries, timestepInAverage, nTimesteps, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp);
 
 }
 
@@ -207,26 +230,28 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager*
                        pointCoordsX_level, pointCoordsY_level, pointCoordsZ_level,
                        level);
         
-        this->addProbeStruct(cudaMemoryManager, probeIndices_level, 
+        this->addProbeStruct(para, cudaMemoryManager, probeIndices_level, 
                             distX_level, distY_level, distZ_level, 
                             pointCoordsX_level, pointCoordsY_level, pointCoordsZ_level, 
                             level);
+
+        if(this->outputTimeSeries) timeseriesFileNames.push_back(this->writeTimeseriesHeader(para, level));
     }
 }
 
-void Probe::addProbeStruct(CudaMemoryManager* cudaMemoryManager, std::vector<int>& probeIndices,
-                                      std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ,   
-                                      std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ,
-                                      int level)
+void Probe::addProbeStruct( Parameter* para, CudaMemoryManager* cudaMemoryManager, std::vector<int>& probeIndices,
+                            std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ,   
+                            std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ,
+                            int level)
 {
     probeParams[level] = SPtr<ProbeStruct>(new ProbeStruct);
-    probeParams[level]->vals = 1;
+    probeParams[level]->nTimesteps = this->getNumberOfTimestepsInTimeseries(para, level);
     probeParams[level]->nPoints  = uint(pointCoordsX.size()); // Note, need to have both nPoints and nIndices because they differ in PlanarAverage
     probeParams[level]->nIndices = uint(probeIndices.size());
 
-    probeParams[level]->pointCoordsX = (real*)malloc(probeParams[level]->nPoints*sizeof(real));
-    probeParams[level]->pointCoordsY = (real*)malloc(probeParams[level]->nPoints*sizeof(real));
-    probeParams[level]->pointCoordsZ = (real*)malloc(probeParams[level]->nPoints*sizeof(real));
+    probeParams[level]->pointCoordsX = (real*)malloc(probeParams[level]->nPoints * sizeof(real));
+    probeParams[level]->pointCoordsY = (real*)malloc(probeParams[level]->nPoints * sizeof(real));
+    probeParams[level]->pointCoordsZ = (real*)malloc(probeParams[level]->nPoints * sizeof(real));
 
     std::copy(pointCoordsX.begin(), pointCoordsX.end(), probeParams[level]->pointCoordsX);
     std::copy(pointCoordsY.begin(), pointCoordsY.end(), probeParams[level]->pointCoordsY);
@@ -267,39 +292,57 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaMemoryManager, std::vector<int
 
     cudaMemoryManager->cudaAllocProbeQuantityArray(this, level);
 
-    for(uint arr=0; arr<probeParams[level]->nArrays; arr++)
-    {
-        for( uint point=0; point<probeParams[level]->nPoints; point++)
-        {
-            probeParams[level]->quantitiesArrayH[arr*probeParams[level]->nPoints+point] = 0.0f;
-        }
-    }
+    std::fill_n(probeParams[level]->quantitiesArrayH, probeParams[level]->nArrays*probeParams[level]->nPoints*probeParams[level]->nTimesteps, c0o1);
+
     if(this->hasDeviceQuantityArray)
         cudaMemoryManager->cudaCopyProbeQuantityArrayHtoD(this, level);
+
 }
 
 void Probe::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t)
 {
     uint t_level = para->getTimeStep(level, t, false);
 
+    SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
+
+    //!Skip empty probes
+    if(probeStruct->nPoints==0) return;
+
     //! if tAvg==1 the probe will be evaluated in every sub-timestep of each respective level
     //! else, the probe will only be evaluated in each synchronous time step tAvg
 
-    uint tAvg_level = this->tAvg==1? this->tAvg: this->tAvg*pow(2,level);          
+    uint level_coefficient = exp2(level);
 
-    if(max(int(t_level) - int(this->tStartAvg*pow(2,level)), -1) % tAvg_level==0)
+    uint tAvg_level = this->tAvg == 1 ? this->tAvg : this->tAvg * level_coefficient;
+    uint tOut_level = this->tOut * level_coefficient;
+    uint tStartOut_level = this->tStartOut * level_coefficient;
+    uint tStartAvg_level = this->tStartAvg * level_coefficient;
+
+    uint tAfterStartAvg = t_level - tStartAvg_level;
+    uint tAfterStartOut = t_level - tStartOut_level;
+
+    if( (t > this->tStartAvg) && (tAfterStartAvg % tAvg_level == 0))
     {
-        SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
         this->calculateQuantities(probeStruct, para, t_level, level);
-        if(t_level>=(this->tStartTmpAveraging*pow(2,level))) probeStruct->vals++;
+
+        if(t > this->tStartTmpAveraging) probeStruct->timestepInTimeAverage++;
+        if(this->outputTimeSeries && (t_level >= tStartOut_level)) probeStruct->timestepInTimeseries++;
     }
 
     //! output only in synchronous timesteps
-    if(max(int(t_level) - int(this->tStartOut*pow(2,level)), -1) % int(this->tOut*pow(2,level)) == 0)
+    if( (t > this->tStartOut) && (tAfterStartOut % tOut_level == 0) )
     {   
         if(this->hasDeviceQuantityArray)
             cudaMemoryManager->cudaCopyProbeQuantityArrayDtoH(this, level);
         this->write(para, level, t);
+        
+        if(level == 0&& !this->outputTimeSeries) this->writeParallelFile(para, t);
+
+        if(this->outputTimeSeries)
+        {
+            probeStruct->lastTimestepInOldTimeseries = probeStruct->timestepInTimeseries > 0 ? probeStruct->timestepInTimeseries - 1: 0;
+            probeStruct->timestepInTimeseries = 0;
+        }
     }
 }
 
@@ -315,11 +358,6 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaMemoryManager)
     }
 }
 
-void Probe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
-{
-    // Do nothing
-};
-
 
 void Probe::addStatistic(Statistic variable)
 {
@@ -335,22 +373,32 @@ void Probe::addStatistic(Statistic variable)
     }
 }
 
+template<typename T>
+std::string nameComponent(std::string name, T value)
+{
+    return "_" + name + "_" + StringUtil::toString<T>(value); 
+}
+
 std::string Probe::makeParallelFileName(int id, int t)
 {
-    return this->probeName + "_bin_ID_" + StringUtil::toString<int>(id) 
-                                           + "_t_" + StringUtil::toString<int>(t) 
-                                           + ".vtk";
+    return this->probeName + "_bin" + nameComponent<int>("ID", id) + nameComponent<int>("t", t) + ".vtk"; 
+
 }
 
 std::string Probe::makeGridFileName(int level, int id, int t, uint part)
 {
-    return this->probeName + "_bin_lev_" + StringUtil::toString<int>(level)
-                                         + "_ID_" + StringUtil::toString<int>(id)
-                                         + "_Part_" + StringUtil::toString<int>(part) 
-                                         + "_t_" + StringUtil::toString<int>(t) 
-                                         + ".vtk";
+    return this->probeName + "_bin" + nameComponent<int>("lev", level)
+                                    + nameComponent<int>("ID", id)
+                                    + nameComponent<int>("part", part)
+                                    + nameComponent<int>("t", t) + ".vtk";
 }
 
+std::string Probe::makeTimeseriesFileName(int level, int id)
+{
+    return this->probeName + "_timeseries" + nameComponent<int>("lev", level)
+                                    + nameComponent<int>("ID", id)
+                                    + ".txt";
+}
 void Probe::addAllAvailableStatistics()
 {
     for( int var=0; var < int(Statistic::LAST); var++)
@@ -362,16 +410,23 @@ void Probe::addAllAvailableStatistics()
 
 void Probe::write(Parameter* para, int level, int t)
 {
-    int t_write = this->fileNameLU ? t: t/this->tOut; 
+    if(this->outputTimeSeries)
+    {
+        this->appendTimeseriesFile(para, level, t);
+    }
+    else
+    {
+        int t_write = this->fileNameLU ? t: t/this->tOut; 
 
-    const uint numberOfParts = this->getProbeStruct(level)->nPoints / para->getlimitOfNodesForVTK() + 1;
+        const uint numberOfParts = this->getProbeStruct(level)->nPoints / para->getLimitOfNodesForVTK() + 1;
 
-    std::vector<std::string> fnames;
-    for (uint i = 1; i <= numberOfParts; i++)
-	{
-        this->writeGridFile(para, level, t_write, i);
+        std::vector<std::string> fnames;
+        for (uint i = 1; i <= numberOfParts; i++)
+        {
+            this->writeGridFile(para, level, t_write, i);
+        }
     }
-    if(level == 0&& !this->outputTimeSeries) this->writeParallelFile(para, t);
+
 }
 
 void Probe::writeParallelFile(Parameter* para, int t)
@@ -398,8 +453,8 @@ void Probe::writeGridFile(Parameter* para, int level, int t, uint part)
 
     SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
 
-    uint startpos = (part-1) * para->getlimitOfNodesForVTK();
-    uint sizeOfNodes = min(para->getlimitOfNodesForVTK(), probeStruct->nPoints - startpos);
+    uint startpos = (part-1) * para->getLimitOfNodesForVTK();
+    uint sizeOfNodes = min(para->getLimitOfNodesForVTK(), probeStruct->nPoints - startpos);
     uint endpos = startpos + sizeOfNodes;
 
     //////////////////////////////////////////////////////////////////////////
@@ -414,25 +469,28 @@ void Probe::writeGridFile(Parameter* para, int level, int t, uint part)
 
     for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes);
 
-    for( int var=0; var < int(Statistic::LAST); var++){           
+    uint arrLen = probeStruct->nPoints;
+    int nTimesteps = probeStruct->nTimesteps;
+    int timestep = probeStruct->timestepInTimeseries;
+
+    for( int var=0; var < int(Statistic::LAST); var++)
+    {           
         if(this->quantities[var])
         {
-            Statistic statistic = static_cast<Statistic>(var);
-            real coeff;
 
+            Statistic statistic = static_cast<Statistic>(var);
             std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic);
             uint n_arrs = uint(postProcessingVariables.size());
 
             uint arrOff = probeStruct->arrayOffsetsH[var];
-            uint arrLen = probeStruct->nPoints;
 
             for(uint arr=0; arr<n_arrs; arr++)
             {
-                coeff = postProcessingVariables[arr].conversionFactor(level);
+                real coeff = postProcessingVariables[arr].conversionFactor(level);
                 
                 for (uint pos = startpos; pos < endpos; pos++)
                 {
-                    nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff);
+                    nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[calcArrayIndex(pos, arrLen, timestep, nTimesteps, arrOff+arr)]*coeff);
                 }
             }
         }
@@ -441,6 +499,91 @@ void Probe::writeGridFile(Parameter* para, int level, int t, uint part)
     this->fileNamesForCollectionFile.push_back(fullName.substr(fullName.find_last_of('/') + 1));
 }
 
+std::string Probe::writeTimeseriesHeader(Parameter* para, int level)
+{
+/*
+File Layout:
+TimeseriesOutput
+Quantities: Quant1 Quant2 Quant3
+Positions:
+point1.x, point1.y, point1.z
+point2.x, point2.y, point2.z
+...
+t0 point1.quant1 point2.quant1 ... point1.quant2 point2.quant2 ...
+t1 point1.quant1 point2.quant1 ... point1.quant2 point2.quant2 ...
+*/
+    auto probeStruct = this->getProbeStruct(level);
+    std::string fname = this->outputPath + "/" + this->makeTimeseriesFileName(level, para->getMyProcessID());
+    std::ofstream out(fname.c_str(), std::ios::out | std::ios::binary);
+
+    if(!out.is_open()) throw std::runtime_error("Could not open timeseries file!");
+
+    out << "TimeseriesOutput \n";
+    out << "Quantities: ";
+    for(std::string name : getVarNames())
+        out << name << ", ";
+    out << "\n";
+    out << "Number of points in this file: \n";
+    out << probeStruct->nPoints << "\n";
+    out << "Positions: x, y, z\n";
+    for( uint i=0; i<probeStruct->nPoints; i++)
+        out << probeStruct->pointCoordsX[i] << ", " << probeStruct->pointCoordsY[i] << ", " << probeStruct->pointCoordsZ[i] << "\n";
+
+    out.close();
+
+    return fname;
+}
+
+void Probe::appendTimeseriesFile(Parameter* para, int level, int t)
+{
+    std::ofstream out(this->timeseriesFileNames[level], std::ios::app | std::ios::binary);
+
+    uint t_level = para->getTimeStep(level, t, false);
+    uint tAvg_level = this->tAvg==1 ? this->tAvg: this->tAvg*exp2(-level);
+
+    real dt = para->getTimeRatio()*tAvg_level;
+    auto probeStruct = this->getProbeStruct(level);
+
+    real t_start = ( t-this->tOut )*para->getTimeRatio();
+
+    int vals_per_timestep = probeStruct->nPoints*probeStruct->nArrays+1;
+
+    real* timestep_array = (real*) malloc(vals_per_timestep*sizeof(real));
+
+    for(uint timestep=0; timestep<probeStruct->timestepInTimeseries; timestep++)
+    {
+        int val = 0;
+        timestep_array[val] = t_start+timestep*dt;
+        val++;
+
+        for( int var=0; var < int(Statistic::LAST); var++)
+        {           
+            if(!this->quantities[var]) continue;
+            
+            Statistic statistic = static_cast<Statistic>(var);
+            std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic);
+            uint n_arrs = uint(postProcessingVariables.size());
+
+            uint arrOff = probeStruct->arrayOffsetsH[var];
+
+            for(uint arr=0; arr<n_arrs; arr++)
+            {
+                real coeff = postProcessingVariables[arr].conversionFactor(level);
+                for(uint point=0; point<probeStruct->nPoints; point++)
+                {
+                    timestep_array[val] = probeStruct->quantitiesArrayH[calcArrayIndex(point, probeStruct->nPoints, timestep, probeStruct->nTimesteps, arrOff+arr)]*coeff;
+                    val++;
+                }
+            }
+            
+        }
+        out.write((char*) timestep_array, sizeof(real)*vals_per_timestep);
+    }
+    out.close();
+}
+
+
+
 std::vector<std::string> Probe::getVarNames()
 {
     std::vector<std::string> varNames;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
index aaf294e87d23c64707a16692b9337d6e9ff9c896..c7391e2a3d6b858facfd4fba094d7942f2bf4505 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
@@ -100,7 +100,11 @@ typedef struct PostProcessingVariable{
 } PostProcessingVariable;
 
 struct ProbeStruct{
-    uint nPoints, nIndices, nArrays, vals;
+    uint nPoints, nIndices, nArrays;
+    uint nTimesteps=1;
+    uint timestepInTimeseries=0;
+    uint timestepInTimeAverage=0;
+    uint lastTimestepInOldTimeseries=0;
     uint *pointIndicesH, *pointIndicesD;
     real *pointCoordsX, *pointCoordsY, *pointCoordsZ;
     bool hasDistances=false;
@@ -110,9 +114,10 @@ struct ProbeStruct{
     uint *arrayOffsetsH, *arrayOffsetsD;
     bool isEvenTAvg = true;
 };
+__host__ __device__ int calcArrayIndex(int node, int nNodes, int timestep, int nTimesteps, int array);
 
 __global__ void calcQuantitiesKernel(   uint* pointIndices,
-                                    uint nPoints, uint n,
+                                    uint nPoints, uint oldTimestepInTimeseries, uint timestepInTimeseries, uint timestepInAverage, uint nTimesteps,
                                     real* vx, real* vy, real* vz, real* rho,            
                                     uint* neighborX, uint* neighborY, uint* neighborZ,
                                     bool* quantities,
@@ -120,7 +125,7 @@ __global__ void calcQuantitiesKernel(   uint* pointIndices,
                                 );
 
 __global__ void interpAndCalcQuantitiesKernel(   uint* pointIndices,
-                                    uint nPoints, uint n,
+                                    uint nPoints, uint oldTimestepInTimeseries, uint timestepInTimeseries, uint timestepInAverage, uint nTimesteps,
                                     real* distX, real* distY, real* distZ,
                                     real* vx, real* vy, real* vz, real* rho,            
                                     uint* neighborX, uint* neighborY, uint* neighborZ,
@@ -128,6 +133,7 @@ __global__ void interpAndCalcQuantitiesKernel(   uint* pointIndices,
                                     uint* quantityArrayOffsets, real* quantityArray
                                 );
 
+uint calcOldTimestep(uint currentTimestep, uint lastTimestepInOldSeries);
 
 class Probe : public PreCollisionInteractor 
 {
@@ -135,13 +141,13 @@ public:
     Probe(
         const std::string _probeName,
         const std::string _outputPath,
-        uint _tStartAvg,
-        uint _tStartTmpAvg,
-        uint _tAvg,
-        uint _tStartOut,
-        uint _tOut,
-        bool _hasDeviceQuantityArray,
-        bool _outputTimeSeries
+        const uint _tStartAvg,
+        const uint _tStartTmpAvg,
+        const uint _tAvg,
+        const uint _tStartOut,
+        const uint _tOut,
+        const bool _hasDeviceQuantityArray,
+        const bool _outputTimeSeries
     ):  probeName(_probeName),
         outputPath(_outputPath),
         tStartAvg(_tStartAvg),
@@ -153,13 +159,12 @@ public:
         outputTimeSeries(_outputTimeSeries),        
         PreCollisionInteractor()
     {
-        if (_tStartOut<_tStartAvg)      throw std::runtime_error("Probe: tStartOut must be larger than tStartAvg!");
+        if (tStartOut < tStartAvg)      throw std::runtime_error("Probe: tStartOut must be larger than tStartAvg!");
     }
     
     void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override;
     void interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t) override;
     void free(Parameter* para, CudaMemoryManager* cudaMemoryManager) override;
-    virtual void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
 
     SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; }
 
@@ -170,7 +175,6 @@ public:
     uint getTStartTmpAveraging(){return this->tStartTmpAveraging;}
 
     void setFileNameToNOut(){this->fileNameLU = false;}
-    void setTStartTmpAveraging(uint _tStartTmpAveraging){this->tStartTmpAveraging = _tStartTmpAveraging;}
 
 protected:
     virtual WbWriterVtkXmlBinary* getWriter(){ return WbWriterVtkXmlBinary::getInstance(); };
@@ -185,7 +189,7 @@ private:
                        std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level,      
                        std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
                        int level) = 0;
-    void addProbeStruct(CudaMemoryManager* cudaMemoryManager, std::vector<int>& probeIndices,
+    void addProbeStruct(Parameter* para, CudaMemoryManager* cudaMemoryManager, std::vector<int>& probeIndices,
                         std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ,   
                         std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ,
                         int level);
@@ -194,10 +198,15 @@ private:
     virtual void write(Parameter* para, int level, int t);
     virtual void writeParallelFile(Parameter* para, int t);
     virtual void writeGridFile(Parameter* para, int level, int t, uint part);
+    std::string writeTimeseriesHeader(Parameter* para, int level);
+    void appendTimeseriesFile(Parameter* para, int level, int t);
 
     std::vector<std::string> getVarNames();
     std::string makeGridFileName(int level, int id, int t, uint part);
     std::string makeParallelFileName(int id, int t);
+    std::string makeTimeseriesFileName(int leve, int id);
+
+    virtual uint getNumberOfTimestepsInTimeseries(Parameter* para, int level){ return 1; }
 
 protected:
     const std::string probeName;
@@ -205,20 +214,19 @@ protected:
 
     std::vector<SPtr<ProbeStruct>> probeParams;
     bool quantities[int(Statistic::LAST)] = {};
-    bool hasDeviceQuantityArray;    //!> flag initiating memCopy in Point and PlaneProbe. Other probes are only based on thrust reduce functions and therefore dont need explict memCopy in interact()
-    bool outputTimeSeries;          //!> flag initiating overwrite of output vtk files, skipping collection files and limiting the length of the written data to the current time step (currently only used for WallModelProbe)
+    const bool hasDeviceQuantityArray;    //!> flag initiating memCopy in Point and PlaneProbe. Other probes are only based on thrust reduce functions and therefore dont need explict memCopy in interact()
+    const bool outputTimeSeries;        //!> flag initiating time series output in Point and WallModelProbe.
     std::vector<std::string> fileNamesForCollectionFile;
+    std::vector<std::string> timeseriesFileNames;
 
     bool fileNameLU = true; //!> if true, written file name contains time step in LU, else is the number of the written probe files
 
 protected:
-    uint tStartAvg;
-    uint tStartTmpAveraging; //!> only non-zero in PlanarAverageProbe and WallModelProbe to switch on Spatio-temporal averaging (while only doing spatial averaging for t<tStartTmpAveraging) 
-    uint tAvg;  //! for tAvg==1 the probe will be evaluated in every sub-timestep of each respective level, else, the probe will only be evaluated in each synchronous time step 
-    uint tStartOut;
-    uint tOut;
-
-    uint tProbe = 0; //!> counter for number of probe evaluations. Only used when outputting timeseries
+    const uint tStartAvg;
+    const uint tStartTmpAveraging; //!> only non-zero in PlanarAverageProbe and WallModelProbe to switch on Spatio-temporal averaging (while only doing spatial averaging for t<tStartTmpAveraging) 
+    const uint tAvg;  //! for tAvg==1 the probe will be evaluated in every sub-timestep of each respective level, else, the probe will only be evaluated in each synchronous time step 
+    const uint tStartOut;
+    const uint tOut;
 
     std::function<real(int)> velocityRatio;
     std::function<real(int)> densityRatio;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
index 3341111c134ace7ca6ff64eeb7f87b38f8014656..f52c666c920a049012888e8e1b71578e68d3da31 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
@@ -10,63 +10,15 @@
 #include <thrust/device_vector.h>
 #include <thrust/reduce.h>
 #include <thrust/device_ptr.h>
-#include <thrust/inner_product.h>
 
 #include "Parameter/Parameter.h"
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
+#include "basics/constants/NumericConstants.h"
 
-
-///////////////////////////////////////////////////////////////////////////////////
-/// Functors for thrust reductions
-///////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-struct pow2 : public thrust::unary_function<T,T>
-{
-  __host__ __device__ T operator()(const T &x) const
-  {
-    return x * x;
-  }
-};
-
-template<typename T>
-struct pow3 : public thrust::unary_function<T,T>
-{
-  __host__ __device__ T operator()(const T &x) const
-  {
-    return x * x * x;
-  }
-};
-
-template<typename T>
-struct pow4 : public thrust::unary_function<T,T>
-{
-  __host__ __device__ T operator()(const T &x) const
-  {
-    return x * x * x * x;
-  }
-};
-
-struct nth_moment
-{
-    const float mean;
-    const int n;
-
-    nth_moment(float _mean, int _n) : mean(_mean), n(_n) {}
-
-    __host__ __device__
-        float operator()(const float& x) const { 
-            
-            real fluctuation = x-mean;
-            real moment = fluctuation;
-            for(int i = 1; i<n; i++) moment *= fluctuation;
-            
-            return moment;
-        }
-};
-
-
+using namespace vf::basics::constant;
+typedef thrust::device_vector<real>::iterator valIterator;
+typedef thrust::device_vector<uint>::iterator indIterator;
 ///////////////////////////////////////////////////////////////////////////////////
 bool WallModelProbe::isAvailableStatistic(Statistic _variable)
 {
@@ -156,16 +108,10 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
                             int level)
 {
     if ( !para->getHasWallModelMonitor())                    throw std::runtime_error("WallModelProbe::findPoints(): !para->getHasWallModelMonitor() !");
-
-    real dt = para->getTimeRatio();
-    uint nt = uint((para->getTimestepEnd()-this->tStartAvg)/this->tAvg);
     
-    for(uint t=0; t<nt; t++)
-    {
-        pointCoordsX_level.push_back(dt*(t*this->tAvg)+this->tStartAvg); // x coord will serve as time in this probe
-        pointCoordsY_level.push_back(0);
-        pointCoordsZ_level.push_back(0);
-    }
+    pointCoordsX_level.push_back(0); 
+    pointCoordsY_level.push_back(0);
+    pointCoordsZ_level.push_back(0);
 
     if(this->evaluatePressureGradient)
     {
@@ -183,118 +129,109 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
 
 ///////////////////////////////////////////////////////////////////////////////////
 
+template<typename T>
+T spatial_mean(T* device_pointer, uint numberOfPoints)
+{
+    thrust::device_ptr<T> thrust_pointer = thrust::device_pointer_cast(device_pointer);
+    return thrust::reduce(thrust_pointer, thrust_pointer+numberOfPoints)/real(numberOfPoints);
+}
+
+template<typename T>
+T index_based_spatial_mean(T* device_pointer, thrust::device_ptr<uint> indeces_ptr, uint numberOfIndeces)
+{
+    thrust::device_ptr<T> thrust_pointer = thrust::device_pointer_cast(device_pointer);
+
+    thrust::permutation_iterator<valIterator, indIterator> iter_begin(thrust_pointer, indeces_ptr);
+    thrust::permutation_iterator<valIterator, indIterator> iter_end  (thrust_pointer, indeces_ptr+numberOfIndeces);
+    return thrust::reduce(iter_begin, iter_end)/real(numberOfIndeces);
+}
+
+template<typename T>
+T compute_and_save_mean(T* device_pointer, uint numberOfPoints, T* quantitiesArray, uint timestep, uint numberOfTimesteps, uint indexOfArray)
+{
+    T mean = spatial_mean(device_pointer, numberOfPoints);
+    quantitiesArray[calcArrayIndex(0, 1, timestep, numberOfTimesteps, indexOfArray)] = mean;
+    return mean;
+}
+
+template<typename T>
+T compute_and_save_index_based_mean(T* device_pointer, thrust::device_ptr<uint> indeces_ptr, uint numberOfIndices, T* quantitiesArray, uint timestep, uint numberOfTimesteps, uint indexOfArray)
+{
+    T mean = index_based_spatial_mean(device_pointer, indeces_ptr, numberOfIndices);
+    quantitiesArray[calcArrayIndex(0, 1, timestep, numberOfTimesteps, indexOfArray)] = mean;
+    return mean;
+}
+
+template<typename T>
+void temporal_average(T* quantitiesArray, T currentValue, uint currentTimestep, uint numberOfTimesteps, uint oldTimestep, uint indexOfArray, real invNumberOfAverages)
+{
+    T oldMean = quantitiesArray[calcArrayIndex(0, 1, oldTimestep, numberOfTimesteps, indexOfArray)];
+    quantitiesArray[calcArrayIndex(0, 1, currentTimestep, numberOfTimesteps, indexOfArray)] = oldMean + (currentValue-oldMean)*invNumberOfAverages;
+}
+
 void WallModelProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level)
 {   
     bool doTmpAveraging = (t>this->getTStartTmpAveraging());
-    real N = para->getParD(level)->stressBC.numberOfBCnodes;
-    if(N<1) return; //Skipping levels without StressBC
-    real n = (real)probeStruct->vals;
-    int nPoints = probeStruct->nPoints;
-
-    // Pointer casts to use device arrays in thrust reductions
-    thrust::device_ptr<real> u_el_thrust    = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vx);
-    thrust::device_ptr<real> v_el_thrust    = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vy);
-    thrust::device_ptr<real> w_el_thrust    = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vz);
-    thrust::device_ptr<real> u1_thrust      = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vx1);
-    thrust::device_ptr<real> v1_thrust      = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vy1);
-    thrust::device_ptr<real> w1_thrust      = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vz1);
-    thrust::device_ptr<real> u_star_thrust  = thrust::device_pointer_cast(para->getParD(level)->wallModel.u_star);
-    thrust::device_ptr<real> Fx_thrust      = thrust::device_pointer_cast(para->getParD(level)->wallModel.Fx);
-    thrust::device_ptr<real> Fy_thrust      = thrust::device_pointer_cast(para->getParD(level)->wallModel.Fy);
-    thrust::device_ptr<real> Fz_thrust      = thrust::device_pointer_cast(para->getParD(level)->wallModel.Fz);
-    thrust::device_ptr<real> dpdx_thrust    = thrust::device_pointer_cast(para->getParD(level)->forceX_SP);
-    thrust::device_ptr<real> dpdy_thrust    = thrust::device_pointer_cast(para->getParD(level)->forceY_SP);
-    thrust::device_ptr<real> dpdz_thrust    = thrust::device_pointer_cast(para->getParD(level)->forceZ_SP);
+    uint numberOfStressBCPoints = para->getParD(level)->stressBC.numberOfBCnodes;
+    if(numberOfStressBCPoints<1) return; //Skipping levels without StressBC
+    uint timestep = probeStruct->timestepInTimeseries;
+    real inv_n = c1o1/real(probeStruct->timestepInTimeAverage+1);
+    uint oldTimestep = calcOldTimestep(timestep, probeStruct->lastTimestepInOldTimeseries);
 
     thrust::device_ptr<uint> indices_thrust = thrust::device_pointer_cast(probeStruct->pointIndicesD);
-    typedef thrust::device_vector<real>::iterator valIterator;
-    typedef thrust::device_vector<uint>::iterator indIterator;
-    thrust::permutation_iterator<valIterator, indIterator> dpdx_iter_begin(dpdx_thrust, indices_thrust);
-    thrust::permutation_iterator<valIterator, indIterator> dpdx_iter_end  (dpdx_thrust, indices_thrust+probeStruct->nIndices);
-    thrust::permutation_iterator<valIterator, indIterator> dpdy_iter_begin(dpdy_thrust, indices_thrust);
-    thrust::permutation_iterator<valIterator, indIterator> dpdy_iter_end  (dpdy_thrust, indices_thrust+probeStruct->nIndices);
-    thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_begin(dpdz_thrust, indices_thrust);
-    thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_end  (dpdz_thrust, indices_thrust+probeStruct->nIndices);
 
     if(probeStruct->quantitiesH[int(Statistic::SpatialMeans)])
     {
-        // Compute the instantaneous spatial means of the velocity moments 
-        real spatMean_u_el      = thrust::reduce(u_el_thrust, u_el_thrust+N)/N;
-        real spatMean_v_el      = thrust::reduce(v_el_thrust, v_el_thrust+N)/N;
-        real spatMean_w_el      = thrust::reduce(w_el_thrust, w_el_thrust+N)/N;
-        real spatMean_u1        = thrust::reduce(u1_thrust, u1_thrust+N)/N;
-        real spatMean_v1        = thrust::reduce(v1_thrust, v1_thrust+N)/N;
-        real spatMean_w1        = thrust::reduce(w1_thrust, w1_thrust+N)/N;
-        real spatMean_u_star    = thrust::reduce(u_star_thrust, u_star_thrust+N)/N;
-        real spatMean_Fx        = thrust::reduce(Fx_thrust, Fx_thrust+N)/N;
-        real spatMean_Fy        = thrust::reduce(Fy_thrust, Fy_thrust+N)/N;
-        real spatMean_Fz        = thrust::reduce(Fz_thrust, Fz_thrust+N)/N;
-
         uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialMeans)];
-        probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+tProbe] = spatMean_u_el;
-        probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+tProbe] = spatMean_v_el;
-        probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+tProbe] = spatMean_w_el;
-        probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+tProbe] = spatMean_u1;
-        probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+tProbe] = spatMean_v1;
-        probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+tProbe] = spatMean_w1;
-        probeStruct->quantitiesArrayH[(arrOff+6)*nPoints+tProbe] = spatMean_u_star;
-        probeStruct->quantitiesArrayH[(arrOff+7)*nPoints+tProbe] = spatMean_Fx;
-        probeStruct->quantitiesArrayH[(arrOff+8)*nPoints+tProbe] = spatMean_Fy;
-        probeStruct->quantitiesArrayH[(arrOff+9)*nPoints+tProbe] = spatMean_Fz;
+        // Compute the instantaneous spatial means of the velocity moments 
+        real spatMean_u_el      = compute_and_save_mean(para->getParD(level)->stressBC.Vx     , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+0);
+        real spatMean_v_el      = compute_and_save_mean(para->getParD(level)->stressBC.Vy     , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+1);
+        real spatMean_w_el      = compute_and_save_mean(para->getParD(level)->stressBC.Vz     , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+2);
+        real spatMean_u1        = compute_and_save_mean(para->getParD(level)->stressBC.Vx1    , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+3);
+        real spatMean_v1        = compute_and_save_mean(para->getParD(level)->stressBC.Vy1    , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+4);
+        real spatMean_w1        = compute_and_save_mean(para->getParD(level)->stressBC.Vz1    , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+5);
+        real spatMean_u_star    = compute_and_save_mean(para->getParD(level)->wallModel.u_star, numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+6);
+        real spatMean_Fx        = compute_and_save_mean(para->getParD(level)->wallModel.Fx    , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+7);
+        real spatMean_Fy        = compute_and_save_mean(para->getParD(level)->wallModel.Fy    , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+8);
+        real spatMean_Fz        = compute_and_save_mean(para->getParD(level)->wallModel.Fz    , numberOfStressBCPoints, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+9);
 
         real spatMean_dpdx;
         real spatMean_dpdy;
         real spatMean_dpdz;
         if(this->evaluatePressureGradient)
         {
-            real N_fluid = (real)probeStruct->nIndices;
-            spatMean_dpdx      = thrust::reduce(dpdx_iter_begin, dpdx_iter_end)/N_fluid;
-            spatMean_dpdy      = thrust::reduce(dpdy_iter_begin, dpdy_iter_end)/N_fluid;
-            spatMean_dpdz      = thrust::reduce(dpdz_iter_begin, dpdz_iter_end)/N_fluid;
-            probeStruct->quantitiesArrayH[(arrOff+10)*nPoints+tProbe] = spatMean_dpdx;
-            probeStruct->quantitiesArrayH[(arrOff+11)*nPoints+tProbe] = spatMean_dpdy;
-            probeStruct->quantitiesArrayH[(arrOff+12)*nPoints+tProbe] = spatMean_dpdz;
+            spatMean_dpdx = compute_and_save_index_based_mean(para->getParD(level)->forceX_SP, indices_thrust, probeStruct->nIndices, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+10);
+            spatMean_dpdy = compute_and_save_index_based_mean(para->getParD(level)->forceY_SP, indices_thrust, probeStruct->nIndices, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+11);
+            spatMean_dpdz = compute_and_save_index_based_mean(para->getParD(level)->forceZ_SP, indices_thrust, probeStruct->nIndices, probeStruct->quantitiesArrayH, timestep, probeStruct->nTimesteps, arrOff+12);
         }
 
         if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalMeans)] && doTmpAveraging)
         {
             uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalMeans)];
-            real spatMean_u_el_old      = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+tProbe-1];
-            real spatMean_v_el_old      = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+tProbe-1];
-            real spatMean_w_el_old      = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+tProbe-1];
-            real spatMean_u1_old        = probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+tProbe-1];
-            real spatMean_v1_old        = probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+tProbe-1];
-            real spatMean_w1_old        = probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+tProbe-1];
-            real spatMean_u_star_old    = probeStruct->quantitiesArrayH[(arrOff+6)*nPoints+tProbe-1];
-            real spatMean_Fx_old        = probeStruct->quantitiesArrayH[(arrOff+7)*nPoints+tProbe-1];
-            real spatMean_Fy_old        = probeStruct->quantitiesArrayH[(arrOff+8)*nPoints+tProbe-1];
-            real spatMean_Fz_old        = probeStruct->quantitiesArrayH[(arrOff+9)*nPoints+tProbe-1];
-
-            probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+tProbe] = spatMean_u_el_old + (spatMean_u_el-spatMean_u_el_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+tProbe] = spatMean_v_el_old + (spatMean_v_el-spatMean_v_el_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+tProbe] = spatMean_w_el_old + (spatMean_w_el-spatMean_w_el_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+tProbe] = spatMean_u1_old + (spatMean_u1-spatMean_u1_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+tProbe] = spatMean_v1_old + (spatMean_v1-spatMean_v1_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+tProbe] = spatMean_w1_old + (spatMean_w1-spatMean_w1_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+6)*nPoints+tProbe] = spatMean_u_star_old +(spatMean_u_star-spatMean_u_star_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+7)*nPoints+tProbe] = spatMean_Fx_old + (spatMean_Fx-spatMean_Fx_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+8)*nPoints+tProbe] = spatMean_Fy_old + (spatMean_Fy-spatMean_Fy_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+9)*nPoints+tProbe] = spatMean_Fz_old + (spatMean_Fz-spatMean_Fz_old)/n;
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_u_el  , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+0, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_v_el  , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+1, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_w_el  , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+2, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_u1    , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+3, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_v1    , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+4, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_w1    , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+5, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_u_star, timestep, probeStruct->nTimesteps, oldTimestep, arrOff+6, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_Fx    , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+7, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_Fy    , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+8, inv_n);
+            temporal_average(probeStruct->quantitiesArrayH, spatMean_Fz    , timestep, probeStruct->nTimesteps, oldTimestep, arrOff+9, inv_n);
 
             if(this->evaluatePressureGradient)
             {
-            real spatMean_dpdx_old     = probeStruct->quantitiesArrayH[(arrOff+10)*nPoints+tProbe-1];
-            real spatMean_dpdy_old     = probeStruct->quantitiesArrayH[(arrOff+11)*nPoints+tProbe-1];
-            real spatMean_dpdz_old     = probeStruct->quantitiesArrayH[(arrOff+12)*nPoints+tProbe-1];
-            probeStruct->quantitiesArrayH[(arrOff+10)*nPoints+tProbe] = spatMean_dpdx_old + (spatMean_dpdx-spatMean_dpdx_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+11)*nPoints+tProbe] = spatMean_dpdy_old + (spatMean_dpdy-spatMean_dpdy_old)/n;
-            probeStruct->quantitiesArrayH[(arrOff+12)*nPoints+tProbe] = spatMean_dpdz_old + (spatMean_dpdz-spatMean_dpdz_old)/n;
+                temporal_average(probeStruct->quantitiesArrayH, spatMean_dpdx, timestep, probeStruct->nTimesteps, oldTimestep, arrOff+10, inv_n);
+                temporal_average(probeStruct->quantitiesArrayH, spatMean_dpdy, timestep, probeStruct->nTimesteps, oldTimestep, arrOff+11, inv_n);
+                temporal_average(probeStruct->quantitiesArrayH, spatMean_dpdz, timestep, probeStruct->nTimesteps, oldTimestep, arrOff+12, inv_n);
             }
         }    
     }
-        
-
-    this->tProbe += 1;
     getLastCudaError("WallModelProbe::calculateQuantities execution failed");
 }
 
+uint WallModelProbe::getNumberOfTimestepsInTimeseries(Parameter* para, int level)
+{
+    return this->tOut*exp2(level)/this->tAvg+1; 
+}
+
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h
index 4ea90f74c7a0d57af4995e1b5874234967f1e901..c912656d2dcbb9551b8a156f7fe9818de0f315a4 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h
@@ -81,6 +81,8 @@ private:
                     std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
                     int level) override;
     void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) override;
+    void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override {};
+    uint getNumberOfTimestepsInTimeseries(Parameter* para, int level) override;
 
 private:
     bool outputStress = false; //!> if true, output wall force is converted to a stress