diff --git a/CMake/3rd/vtk.cmake b/CMake/3rd/vtk.cmake
index 83cf22e8849298d2b42909664cacd5fd9044903e..7bba91a5a25da622ef752d670845754a6349815c 100644
--- a/CMake/3rd/vtk.cmake
+++ b/CMake/3rd/vtk.cmake
@@ -3,9 +3,8 @@
 # VTK_DIR needs to bet set to the VTK build directory in the config file.
 #########################################################################
 find_package(VTK REQUIRED)
-    vf_get_library_name(library_name)
+vf_get_library_name(library_name)
 
-    include(${VTK_USE_FILE})
-    target_include_directories(${library_name} PRIVATE ${VTK_INCLUDE_DIRS})
+target_include_directories(${library_name} PRIVATE ${VTK_INCLUDE_DIRS})
 
-    target_link_libraries(${library_name} PRIVATE ${VTK_LIBRARIES})
+target_link_libraries(${library_name} PRIVATE ${VTK_LIBRARIES})
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dcc76b410d2b45f61a4b9d85ae54605f658def5b..ba35020d9d893637d82d968b68116ff6a922e99a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,6 @@ option(BUILD_USE_BOOST "Build VirtualFluids with boost" OFF)
 option(BUILD_USE_MPI "include MPI library support" ON)
 
 # vf gpu
-option(BUILD_VF_GPU          "Build VirtualFluids GPU"     ON )
 option(BUILD_VF_GKS          "Build VirtualFluids GKS"     OFF )
 option(BUILD_VF_TRAFFIC      "Build VirtualFluids Traffic" OFF)
 option(BUILD_JSONCPP         "Builds json cpp "            OFF)
@@ -98,7 +97,7 @@ IF( BUILD_VF_DOUBLE_ACCURACY )
     list(APPEND VF_COMPILER_DEFINITION VF_DOUBLE_ACCURACY)
 ENDIF()
 
-if(BUILD_VF_GPU)
+if(BUILD_VF_GPU OR BUILD_VF_GKS)
     include(CheckLanguage)
     check_language(CUDA)
 
@@ -196,7 +195,7 @@ add_subdirectory(src/lbm)
 if (BUILD_VF_CPU)
     include (cpu.cmake)
 endif()
-if(BUILD_VF_GPU)
+if(BUILD_VF_GPU OR BUILD_VF_GKS)
     add_subdirectory(src/cuda)
     include (gpu.cmake)
 endif()
diff --git a/apps/gpu/GKS/Flame7cm/CMakeLists.txt b/apps/gpu/GKS/Flame7cm/CMakeLists.txt
index c3f57dcdd8c9ea9e8ba891d72c039f5acdfc9cf9..75ca5fa4b4e9c51724d32a3733559b9489ce7943 100644
--- a/apps/gpu/GKS/Flame7cm/CMakeLists.txt
+++ b/apps/gpu/GKS/Flame7cm/CMakeLists.txt
@@ -1,6 +1,5 @@
-PROJECT(Flame7cm)
+PROJECT(Flame7cm LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator GksMeshAdapter GksVtkAdapter GksGpu FILES Flame7cm.cpp )
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator GksMeshAdapter GksVtkAdapter GksGpu MPI::MPI_CXX FILES Flame7cm.cpp )
 
-include (${VF_CMAKE_DIR}/3rd/cuda.cmake)
-include (${VF_CMAKE_DIR}/3rd/mpi.cmake)
+set_source_files_properties(Flame7cm.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/apps/gpu/GKS/Flame7cm/Flame7cm.cpp b/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
index e0b736dfe5fb1be1a038bfb4ea6ad88b570f5951..4323ce5ae3bf8486a2203adec470e0d1fdc05a70 100644
--- a/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
+++ b/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
@@ -152,11 +152,10 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+    // auto gridFactory = GridFactory::make();
+    // gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
 
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    auto gridBuilder = MultipleGridBuilder::makeShared();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -476,14 +475,14 @@ int main( int argc, char* argv[])
     {
         thermalCavity( path, simulationName, gpuIndex, nx, useTempLimiter, restartIter );
     }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
     catch (const std::bad_alloc& e)
     {  
         *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
     }
+    catch (const std::exception& e)
+    {     
+        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+    }
     catch (...)
     {
         *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bc4a428ec890f5ba04fbd07333ac782b3681873f
--- /dev/null
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -0,0 +1,276 @@
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <stdexcept>
+#include <fstream>
+#include <exception>
+#include <memory>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "Core/DataTypes.h"
+#include "PointerDefinitions.h"
+
+#include "Core/LbmOrGks.h"
+#include "Core/StringUtilities/StringUtil.h"
+
+#include "Core/VectorTypes.h"
+#include "Core/Logger/Logger.h"
+
+#include <basics/config/ConfigurationFile.h>
+
+#include <logger/Logger.h>
+
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h"
+
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
+#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+LbmOrGks lbmOrGks = LBM;
+
+const real reference_diameter = 126.0; // diameter in m
+
+const real L_x = 10*reference_diameter;
+const real L_y = 6*reference_diameter;
+const real L_z = 6*reference_diameter;
+
+const real viscosity = 1.56e-5;
+
+const real velocity  = 9.0;
+
+const real mach = 0.1;
+
+const uint nodes_per_diameter = 16;
+
+std::string path(".");
+
+std::string simulationName("ActuatorLine");
+
+const uint timeStepOut = 500;
+const float tEnd = 280; // total time of simulation in s
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+
+    logging::Logger::addStream(&std::cout);
+    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+    logging::Logger::timeStamp(logging::Logger::ENABLE);
+    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	const real dx = reference_diameter/real(nodes_per_diameter);
+
+	gridBuilder->addCoarseGrid(0.0, 0.0, 0.0,
+							   L_x,  L_y,  L_z, dx);
+
+	gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+	gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    if( lbmOrGks == LBM )
+    {
+        vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+
+        vf::basics::ConfigurationFile config;
+        config.load(configPath);
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        const real dt = dx * mach / (sqrt(3) * velocity);
+
+        const real velocityLB = velocity * dt / dx; // LB units
+
+        const real viscosityLB = viscosity * dt / (dx * dx); // LB units
+
+        VF_LOG_INFO("velocity  [dx/dt] = {}", velocityLB);
+        VF_LOG_INFO("viscosity [10^8 dx^2/dt] = {}", viscosityLB*1e8);
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+		para->setDevices(std::vector<uint>{(uint)0});
+
+        para->setOutputPrefix( simulationName );
+
+        para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
+
+        para->setPrintFiles(true);
+
+        para->setMaxLevel(1);
+
+        para->setVelocity(velocityLB);
+        para->setViscosity(viscosityLB);
+
+        para->setVelocityRatio( dx / dt );
+
+		para->setMainKernel("CumulantK17CompChim");
+
+		para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
+            rho = (real)0.0;
+            vx  = velocityLB;
+            vy  = (real)0.0;
+            vz  = (real)0.0;
+        });
+
+        para->setTOut( timeStepOut );
+        para->setTEnd( uint(tEnd/dt) );
+
+        para->setIsBodyForce( true );
+
+
+        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        gridBuilder->setVelocityBoundaryCondition(SideType::MX,  velocityLB,  0.0, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PX,  velocityLB,  0.0, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::MY,  velocityLB,  0.0, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PY,  velocityLB,  0.0, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::MZ,  velocityLB,  0.0, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PZ,  velocityLB,  0.0, 0.0);
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
+
+        SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
+
+        real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter};
+        real epsilon = 5.f; // width of gaussian smearing
+        real density = 1.225f;
+        int level = 0;
+        uint nBlades = 3;
+        uint nBladeNodes = 32;
+
+
+        SPtr<ActuatorLine> actuator_line =SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) );
+        para->addActuator( actuator_line );
+
+        SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", 100, 500, 100) );
+        std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
+        std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+        std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+        pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
+        // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
+        pointProbe->addPostProcessingVariable(PostProcessingVariable::Means);
+        pointProbe->addPostProcessingVariable(PostProcessingVariable::Variances);
+        para->addProbe( pointProbe );
+
+        SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", 100, 500, 100) );
+        planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
+        planeProbe->addPostProcessingVariable(PostProcessingVariable::Means);
+        para->addProbe( planeProbe );
+
+
+
+
+        Simulation sim;
+        SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+        SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
+        SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
+        sim.setFactories(kernelFactory, preProcessorFactory);
+        sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);        
+        sim.run();
+        sim.free();
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    }
+}
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2; 
+    if ( argv != NULL )
+    {
+        //str = static_cast<std::string>(argv[0]);
+        
+        try
+        {
+            //////////////////////////////////////////////////////////////////////////
+
+            vf::logging::Logger::initalizeLogger();
+
+            if( argc > 1){ path = argv[1]; }
+
+			multipleLevel(path + "/configActuatorLine.txt");
+
+            //////////////////////////////////////////////////////////////////////////
+		}
+        catch (const spdlog::spdlog_ex &ex) {
+            std::cout << "Log initialization failed: " << ex.what() << std::endl;
+        }
+
+        catch (const std::bad_alloc& e)
+        { 
+            VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
+        }
+        catch (const std::exception& e)
+        {   
+            VF_LOG_CRITICAL("exception: {}", e.what());
+        }
+        catch (...)
+        {
+            VF_LOG_CRITICAL("Unknown exception!");
+        }
+    }
+
+    MPI_Finalize();
+    return 0;
+}
diff --git a/apps/gpu/LBM/ActuatorLine/CMakeLists.txt b/apps/gpu/LBM/ActuatorLine/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0ff4e06e83a957be6966a7322ff06a0d068d18a
--- /dev/null
+++ b/apps/gpu/LBM/ActuatorLine/CMakeLists.txt
@@ -0,0 +1,7 @@
+PROJECT(ActuatorLine LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES ActuatorLine.cpp)
+
+set_source_files_properties(ActuatorLine.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(ActuatorLine PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b590b29f71f7e965828f3821608e0013fb281b1
--- /dev/null
+++ b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt
@@ -0,0 +1,8 @@
+##################################################
+#informations for Writing
+##################################################
+Path = "."
+##################################################
+#informations for reading
+##################################################
+GridPath="."
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index ded3d2f3315d651c94add505e142ee585063d13a..fd9884549813f89542b7e91768073b951bb1691c 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -99,6 +99,10 @@ const real dt = (real)1.0e-3; //0.5e-3;
 
 const uint nx = 64;
 
+//std::string path("F:/Work/Computations/out/DrivenCavity/"); //LEGOLAS
+//std::string path("D:/out/DrivenCavity"); //Mollok
+std::string path(".");
+
 std::string simulationName("DrivenCavityChim");
 
 const uint timeStepOut = 10000;
diff --git a/gpu.cmake b/gpu.cmake
index 47b820dfa561b2e1bfbd0e824c0e6586f9a7a3f1..ead5e26bca299819513e4f7639d5431d8973c927 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -1,5 +1,3 @@
-
-
 if(BUILD_NUMERIC_TESTS)
     set(CMAKE_CXX_STANDARD 17)
 endif()
@@ -44,6 +42,7 @@ IF (BUILD_VF_GPU)
     #add_subdirectory(apps/gpu/LBM/gridGeneratorTest)
     #add_subdirectory(apps/gpu/LBM/TGV_3D)
     #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
+    add_subdirectory(apps/gpu/LBM/ActuatorLine)
 ELSE()
     MESSAGE( STATUS "exclude Virtual Fluids GPU." )
 ENDIF()
@@ -137,4 +136,4 @@ endif()
 if(BUILD_VF_TRAFFIC)
     add_subdirectory(src/gpu/Traffic)
     add_subdirectory(apps/gpu/LBM/TrafficTest)
-endif()
\ No newline at end of file
+endif()
diff --git a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu b/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
index 5a3dc76db1440fc641bd7c58cdce48b7996d63ee..346692bfdf8c8daf9a659a3a0ef04aa57f487545 100644
--- a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
+++ b/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
@@ -59,6 +59,8 @@ bool EnstrophyAnalyzer::run(uint iter)
     this->enstrophyTimeSeries.push_back( EnstrophyTmp );
 
     //*logging::out << logging::Logger::INFO_HIGH << "EKin = " << EKin << "\n";
+
+    return true;
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu b/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
index 266d3b564d5a413f68fa27130b6a9f02eac54511..ed68f8d95a2a68c00ab53c2cd1037bbff43e0f5b 100644
--- a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
+++ b/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
@@ -63,6 +63,8 @@ bool HeatFluxAnalyzer::run(uint iter, Parameters parameters)
     this->heatFluxTimeSeries.push_back( q / qIdeal );
 
     if( iter % this->outputIter == 0 ) *logging::out << logging::Logger::INFO_HIGH << "q = " << q / qIdeal << "\n";
+
+    return true;
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu b/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
index d05c47cd2309c6b2804f04dbd0c2214d3be388a3..5e896e03e7f02b63759f4ff6d42ca7f7f5e7bfa5 100644
--- a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
+++ b/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
@@ -51,6 +51,8 @@ bool TurbulenceAnalyzer::run(uint iter, Parameters parameters)
     getLastCudaError("TurbulenceAnalyzer::run(uint iter, Parameters parameters)");
 
     this->counter++;
+
+    return true;
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu b/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
index 2b8b8174fd96d335a82cd80586415fc598576678..fd55918246bd4aebb38f1ed8982d86e776eb7fbb 100644
--- a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
@@ -91,7 +91,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
                                                           const uint startIndex,
                                                           const uint index)
 {
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
+    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu b/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
index 3ecd1b6cd52c2210b0e67c48937f1db1b9420f2a..87f880bcf1001a012487f5df327566dfe1ded350 100644
--- a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
@@ -91,7 +91,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
                                                           const uint startIndex,
                                                           const uint index)
 {
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
+    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/BoundaryConditions/Inflow.cu b/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
index 21ab9829309e072fd5049026bfb74d79c4393acb..7f9b2777f5e75a5c79a2ee5f280871a021cf6c94 100644
--- a/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
@@ -86,7 +86,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
 {
     uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
+    // uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
 
     PrimitiveVariables ghostCellPrim;
     {
diff --git a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu b/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
index 4aaf406348754db851e1c45be542f158d7621b36..f6e69742635d594b2f0f1319642c51a5dde78a9e 100644
--- a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
@@ -91,7 +91,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
                                                           const uint startIndex,
                                                           const uint index)
 {
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
+    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/CMakeLists.txt b/src/gpu/GksGpu/CMakeLists.txt
index da404e0209ed2c9f36ae323d2e6bd234fb6dfb96..5dbc533cc5f45c006c29a12242350f0433518bbf 100644
--- a/src/gpu/GksGpu/CMakeLists.txt
+++ b/src/gpu/GksGpu/CMakeLists.txt
@@ -1,3 +1,10 @@
 project(GksGpu LANGUAGES CUDA CXX)
 
-vf_add_library(PRIVATE_LINK basics GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
+vf_add_library(PRIVATE_LINK basics lbmCuda GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
+
+target_include_directories(GksGpu PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
+
+if (NOT MSVC)
+    target_compile_options(GksGpu PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-fPIC>")
+endif()
+
diff --git a/src/gpu/GksGpu/CellProperties/CellProperties.cuh b/src/gpu/GksGpu/CellProperties/CellProperties.cuh
index 1ce36e85baa70739d9a98abf6cb9d3b9bd1a25f8..08731b9f52cdc54cc41d5e239ac05ee6e88fecd7 100644
--- a/src/gpu/GksGpu/CellProperties/CellProperties.cuh
+++ b/src/gpu/GksGpu/CellProperties/CellProperties.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 //////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/GksGpu/CellUpdate/Reaction.cuh b/src/gpu/GksGpu/CellUpdate/Reaction.cuh
index 4bf317b2704d0111079fcb9888f62026265e6fd0..21ba61220fd7b81fbb53002ea090d278d228bb66 100644
--- a/src/gpu/GksGpu/CellUpdate/Reaction.cuh
+++ b/src/gpu/GksGpu/CellUpdate/Reaction.cuh
@@ -36,14 +36,14 @@ inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStr
         real uHead = c1o2 * prim.U;
 
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 0, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 0, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
             uHead += c1o4 * neighborPrim.U;
         }
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 1, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 1, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
@@ -57,14 +57,14 @@ inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStr
         real vHead = c1o2 * prim.V;
 
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 2, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 2, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
             vHead += c1o4 * neighborPrim.V;
         }
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 3, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 3, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
@@ -78,14 +78,14 @@ inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStr
         real wHead = c1o2 * prim.W;
 
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 4, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 4, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
             wHead += c1o4 * neighborPrim.W;
         }
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 5, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 5, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
diff --git a/src/gpu/GksGpu/DataBase/DataBase.cpp b/src/gpu/GksGpu/DataBase/DataBase.cpp
index 21c51b7575fbb46870b6b0397fe01d954e6458be..46921a683de3dd9c322be2d89b4ca66f6fa07020 100644
--- a/src/gpu/GksGpu/DataBase/DataBase.cpp
+++ b/src/gpu/GksGpu/DataBase/DataBase.cpp
@@ -14,6 +14,10 @@
 #include "GksMeshAdapter/GksMeshAdapter.h"
 #include "Communication/Communicator.h"
 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
 namespace GksGpu {
 
 DataBase::DataBase( std::string type ) 
diff --git a/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh b/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
index 3ff9848f15d57ab66ed5c8c6178df3afd1d4c581..2ad158173970c5bb36637643f621c729a8fcc37a 100644
--- a/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
+++ b/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include "Core/DataTypes.h"
 #include "Core/RealConstants.h"
diff --git a/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh b/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
index 3ec3a7c9e60cf60d14fec43eaa1d17792148fb15..3b7929b39b47761624fec7052becc55921990276 100644
--- a/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
+++ b/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
@@ -4,15 +4,22 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
 
 #include "Definitions/PassiveScalar.h"
 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
 namespace GksGpu {
 
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh b/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
index c33f02ea3f9f4d13050c72b52b471b312a52b1c8..b7b759c99ffec6118a4173af098e0b372caf6ef7 100644
--- a/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
+++ b/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include "Core/DataTypes.h"
 #include "Core/RealConstants.h"
diff --git a/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh b/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
index 3002ed3ddc275ce9dda7499430153b7460e2452f..04a164aa327bed36cca2b8756c87dd1c7d9f0a64 100644
--- a/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
+++ b/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
@@ -1,12 +1,16 @@
 //#ifndef HeatCapacities_H
 //#define HeatCapacities_H
 //
-//#ifdef __CUDACC__
-//#include <cuda_runtime.h>
-//#else
-//#define __host__
-//#define __device__
-//#endif
+// #ifdef __CUDACC__
+// #include <cuda_runtime.h>
+// #else
+// #ifndef __host__
+// #define __host__
+// #endif
+// #ifndef __device__
+// #define __device__
+// #endif
+// #endif
 //
 //#include "Core/DataTypes.h"
 //#include "Core/RealConstants.h"
diff --git a/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh b/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
index 9f3a268a5d8a7e64d59b0c1b60eeec8d68423174..47eb261a089b9a1c8d7bb14bca864c334887d447 100644
--- a/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
+++ b/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include <math.h>
 
diff --git a/src/gpu/GksGpu/FluxComputation/FluxComputation.cu b/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
index 8c935863615eb1c3f117c87b6ba57e92a0061c0f..25ba5726bfd505518bf82b88accea1c3549c5b96 100644
--- a/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
+++ b/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
@@ -152,7 +152,7 @@ __host__ __device__ inline void fluxFunction(DataBaseStruct dataBase, Parameters
     {
         if( parameters.spongeLayerIdx == 0 )
         {
-            real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
+            // real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
             real z = dataBase.faceCenter[VEC_Z(faceIndex, dataBase.numberOfFaces)];
 
             real muNew = parameters.mu;
@@ -168,7 +168,7 @@ __host__ __device__ inline void fluxFunction(DataBaseStruct dataBase, Parameters
         }
         if( parameters.spongeLayerIdx == 1 )
         {
-            real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
+            // real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
             real z = dataBase.faceCenter[VEC_Z(faceIndex, dataBase.numberOfFaces)];
 
             real muNew = parameters.mu;
diff --git a/src/gpu/GksGpu/Output/VtkWriter.cpp b/src/gpu/GksGpu/Output/VtkWriter.cpp
index a1a0ab9f62f275107e790a34c407c83adc09ab2d..234151c7df481e81e5dd68c9a4692831f7271f54 100644
--- a/src/gpu/GksGpu/Output/VtkWriter.cpp
+++ b/src/gpu/GksGpu/Output/VtkWriter.cpp
@@ -47,6 +47,8 @@
 #include "FlowStateData/FlowStateDataConversion.cuh"
 #include "FlowStateData/AccessDeviceData.cuh"
 
+namespace GksGpu {
+
 void VtkWriter::write(std::shared_ptr<DataBase> dataBase, Parameters parameters, std::string filename)
 {
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".vtu" << " ... \n";
@@ -144,3 +146,5 @@ void VtkWriter::write(std::shared_ptr<DataBase> dataBase, Parameters parameters,
 
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
 }
+
+}
diff --git a/src/gpu/GksGpu/Output/VtkWriter.h b/src/gpu/GksGpu/Output/VtkWriter.h
index 0596fc7bd164050236b8db54a31ab7689a84d01f..679fae55b2db5ec418b389ca0840961ab8f80dde 100644
--- a/src/gpu/GksGpu/Output/VtkWriter.h
+++ b/src/gpu/GksGpu/Output/VtkWriter.h
@@ -38,9 +38,12 @@
 
 #include "GksGpu_export.h"
 
+namespace GksGpu {
+
 struct DataBase;
 struct Parameters;
 
+
 class GKSGPU_EXPORT VtkWriter
 {
 public:
@@ -49,4 +52,6 @@ public:
                        std::string filename );
 };
 
+}
+
 #endif
\ No newline at end of file
diff --git a/src/gpu/GksMeshAdapter/CMakeLists.txt b/src/gpu/GksMeshAdapter/CMakeLists.txt
index cb00b3c016786c41ef5640eb362322bb0a3768f8..b9a2d12df4d0bee9396a706c6636b5f4056b2d3a 100644
--- a/src/gpu/GksMeshAdapter/CMakeLists.txt
+++ b/src/gpu/GksMeshAdapter/CMakeLists.txt
@@ -1,3 +1,3 @@
 project(GksMeshAdapter LANGUAGES CUDA CXX)
 
-vf_add_library(PRIVATE_LINK basics GridGenerator)
+vf_add_library(PRIVATE_LINK basics GridGenerator lbmCuda)
diff --git a/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp b/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
index 16f5c208565ff090cd2344348d5e47150babe84e..8d032dfeead2f582c5af2426c45b09ead33883cc 100644
--- a/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
+++ b/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
@@ -22,6 +22,10 @@
 #include "MeshCell.h"
 #include "MeshFace.h"
 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
 using namespace vf::gpu;
 
 GksMeshAdapter::GksMeshAdapter(SPtr<MultipleGridBuilder> gridBuilder)
@@ -518,7 +522,7 @@ void GksMeshAdapter::sortFaces()
     // sort into blocks
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    std::array<char, 3> orientations = {'x', 'y', 'z'};
+    // std::array<char, 3> orientations = {'x', 'y', 'z'};
 
     for( uint level = 0; level < this->gridBuilder->getNumberOfLevels(); level++ )
     {
@@ -527,17 +531,17 @@ void GksMeshAdapter::sortFaces()
             uint start =         this->startOfFacesPerLevelXYZ [ 3 * level + idx];
             uint end   = start + this->numberOfFacesPerLevelXYZ[ 3 * level + idx];
 
-            real xMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
-            real yMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
-            real zMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
+            // real xMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
+            // real yMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
+            // real zMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
 
             real xMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
             real yMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
             real zMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
 
-            real xRange = xMax - xMin;
-            real yRange = yMax - yMin;
-            real zRange = zMax - zMin;
+            // real xRange = xMax - xMin;
+            // real yRange = yMax - yMin;
+            // real zRange = zMax - zMin;
 
             uint blockDim = 8;
 
diff --git a/src/gpu/GksVtkAdapter/CMakeLists.txt b/src/gpu/GksVtkAdapter/CMakeLists.txt
index 644dc6defa101644338fc35211efa7eccabca3b0..fdc7a1eb56f548afc58e83ef7b0f7ad02ad12ea9 100644
--- a/src/gpu/GksVtkAdapter/CMakeLists.txt
+++ b/src/gpu/GksVtkAdapter/CMakeLists.txt
@@ -1,5 +1,5 @@
 
 
-vf_add_library(BUILDTYPE shared PRIVATE_LINK basics GksGpu)
+vf_add_library(BUILDTYPE static PRIVATE_LINK basics GksGpu)
 
 include (${VF_CMAKE_DIR}/3rd/vtk.cmake)
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 71757d073fb30bc888173fa47adf7236e199f3a5..a061b83547c0ddf6bb67a4b48b8b6ee3503a8e58 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -8,6 +8,9 @@
 #include "Communication/ExchangeData27.h"
 #include "Kernel/Kernel.h"
 
+void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
+void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
+
 void updateGrid27(Parameter* para, 
                   vf::gpu::Communicator& comm, 
                   CudaMemoryManager* cudaManager, 
@@ -59,6 +62,10 @@ void updateGrid27(Parameter* para,
 
         coarseToFine(para, level);
     }
+
+    interactWithActuators(para, cudaManager, level, t);
+
+    interactWithProbes(para, cudaManager, level, t);
 }
 
 void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels)
@@ -1259,3 +1266,19 @@ void coarseToFine(Parameter* para, int level)
     } 
 
 }
+
+void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+{
+    for( SPtr<PreCollisionInteractor> actuator: para->getActuators() )
+    {
+        actuator->interact(para, cudaManager, level, t);
+    }
+}
+
+void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+{
+    for( SPtr<PreCollisionInteractor> probe: para->getProbes() )
+    {
+        probe->interact(para, cudaManager, level, t);
+    }
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index 957935756163795945e9ac1c4762e0eb825239ee..f21ee67a1054f6395b84377cbb71c3b2ff9ceaec 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -81,6 +81,12 @@ void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level)
             para->getParH(level)->gDyvz[j] = 0.0f;
             para->getParH(level)->gDzvz[j] = 0.0f;
         }
+
+        if (para->getIsBodyForce()) {
+            para->getParH(level)->forceX_SP[j] = 0.0f;
+            para->getParH(level)->forceY_SP[j] = 0.0f;
+            para->getParH(level)->forceZ_SP[j] = 0.0f;
+        }
     }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 3ce1093d1847743962f3b76cbe62445cbacef312..6edd22963d51404eaf683be77b74e1bba543881c 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -63,6 +63,9 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
 
         if(para->getUseWale())
             cudaMemoryManager->cudaAllocTurbulentViscosity(level);
+        
+        if(para->getIsBodyForce())
+            cudaMemoryManager->cudaAllocBodyForce(level);
 
 		builder->getNodeValues(
 			para->getParH(level)->coordX_SP,
@@ -80,6 +83,8 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
         cudaMemoryManager->cudaCopyNeighborWSB(level);
         cudaMemoryManager->cudaCopySP(level);
         cudaMemoryManager->cudaCopyCoord(level);
+        if(para->getIsBodyForce())
+            cudaMemoryManager->cudaCopyBodyForce(level);
 
         //std::cout << verifyNeighborIndices(level);
 	}
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 8e281818608ee3f7a222b5890d10bcae9544e9bf..7074a222b7f90003d4db62dcded33fb582b5d2bb 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -5,6 +5,8 @@
 #include <math.h>
 
 #include <Parameter/Parameter.h>
+#include <PreCollisionInteractor/ActuatorLine.h>
+#include <PreCollisionInteractor/Probes/Probe.h>
 
 #include "Calculation/PorousMedia.h"
 
@@ -42,7 +44,7 @@ void CudaMemoryManager::cudaAllocCoord(int lev)
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	//Device (spinning ship + upsala)
+	//Device (spinning ship + uppsala)
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
@@ -69,7 +71,7 @@ void CudaMemoryManager::cudaAllocBodyForce(int lev)
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	//Device (spinning ship)
+	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
@@ -2643,10 +2645,306 @@ void CudaMemoryManager::cudaFree2ndOrderDerivitivesIsoTest(int lev)
     checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->dzzUz));
     
 }
+////////////////////////////////////////////////////////////////////////////////////
+//  ActuatorLine
+///////////////////////////////////////////////////////////////////////////////
+
+void CudaMemoryManager::cudaAllocBladeRadii(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeRadiiH, sizeof(real)*actuatorLine->getNBladeNodes()) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeRadiiD, sizeof(real)*actuatorLine->getNBladeNodes()) );
+
+    setMemsizeGPU(sizeof(real)*actuatorLine->getNBladeNodes(), false);
+}
+
+void CudaMemoryManager::cudaCopyBladeRadiiHtoD(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeRadiiD, actuatorLine->bladeRadiiH, sizeof(real)*actuatorLine->getNBladeNodes(), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaCopyBladeRadiiDtoH(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeRadiiH, actuatorLine->bladeRadiiD, sizeof(real)*actuatorLine->getNBladeNodes(), cudaMemcpyDeviceToHost) );
+}
+
+void CudaMemoryManager::cudaFreeBladeRadii(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaFree(actuatorLine->bladeRadiiD) );
+    
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeRadiiH) );
+}
+
+void CudaMemoryManager::cudaAllocBladeCoords(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsXH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsYH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsZH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsXD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsYD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsZD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+
+    setMemsizeGPU(3.f*actuatorLine->getNumberOfNodes(), false);
+}
+
+void CudaMemoryManager::cudaCopyBladeCoordsHtoD(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsXD, actuatorLine->bladeCoordsXH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsYD, actuatorLine->bladeCoordsYH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsZD, actuatorLine->bladeCoordsZH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaCopyBladeCoordsDtoH(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsXH, actuatorLine->bladeCoordsXD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsYH, actuatorLine->bladeCoordsYD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsZH, actuatorLine->bladeCoordsZD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+}
+
+void CudaMemoryManager::cudaFreeBladeCoords(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaFree(actuatorLine->bladeCoordsXD) );
+    checkCudaErrors( cudaFree(actuatorLine->bladeCoordsYD) );
+    checkCudaErrors( cudaFree(actuatorLine->bladeCoordsZD) );
+
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsXH) );
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsYH) );
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsZH) );
+}
+
+void CudaMemoryManager::cudaAllocBladeIndices(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeIndicesH, sizeof(uint)*actuatorLine->getNumberOfNodes()) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeIndicesD, sizeof(uint)*actuatorLine->getNumberOfNodes()) );
+
+    setMemsizeGPU(sizeof(uint)*actuatorLine->getNumberOfNodes(), false);
+}
+
+void CudaMemoryManager::cudaCopyBladeIndicesHtoD(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeIndicesD, actuatorLine->bladeIndicesH, sizeof(uint)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaFreeBladeIndices(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaFree(actuatorLine->bladeIndicesD) );
+
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeIndicesH) );
+}
 
+void CudaMemoryManager::cudaAllocBladeVelocities(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesXH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesYH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesZH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
 
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesXD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesYD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesZD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
 
+    setMemsizeGPU(3.*sizeof(real)*actuatorLine->getNumberOfNodes(), false);
+}
 
+void CudaMemoryManager::cudaCopyBladeVelocitiesHtoD(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesXD, actuatorLine->bladeVelocitiesXH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesYD, actuatorLine->bladeVelocitiesYH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesZD, actuatorLine->bladeVelocitiesZH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaCopyBladeVelocitiesDtoH(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesXH, actuatorLine->bladeVelocitiesXD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesYH, actuatorLine->bladeVelocitiesYD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesZH, actuatorLine->bladeVelocitiesZD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+}
+
+void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesXD) );
+    checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesYD) );
+    checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesZD) );
+
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesXH) );
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesYH) );
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesZH) );
+}
+
+void CudaMemoryManager::cudaAllocBladeForces(ActuatorLine* actuatorLine)
+{   
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesXH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesYH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesZH, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesXD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesYD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesZD, sizeof(real)*actuatorLine->getNumberOfNodes()) );
+
+    setMemsizeGPU(3.*sizeof(real)*actuatorLine->getNumberOfNodes(), false);
+}
+
+void CudaMemoryManager::cudaCopyBladeForcesHtoD(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesXD, actuatorLine->bladeForcesXH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesYD, actuatorLine->bladeForcesYH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesZD, actuatorLine->bladeForcesZH, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaCopyBladeForcesDtoH(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesXH, actuatorLine->bladeForcesXD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesYH, actuatorLine->bladeForcesYD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesZH, actuatorLine->bladeForcesZD, sizeof(real)*actuatorLine->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+}
+
+void CudaMemoryManager::cudaFreeBladeForces(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaFree(actuatorLine->bladeForcesXD) );
+    checkCudaErrors( cudaFree(actuatorLine->bladeForcesYD) );
+    checkCudaErrors( cudaFree(actuatorLine->bladeForcesZD) );
+
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesXH) );
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesYH) );
+    checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesZH) );
+}
+
+void CudaMemoryManager::cudaAllocSphereIndices(ActuatorLine* actuatorLine)
+{    
+    checkCudaErrors( cudaMallocHost((void**) &(actuatorLine->boundingSphereIndicesH), sizeof(int)*actuatorLine->getNumberOfIndices()));
+    checkCudaErrors( cudaMalloc((void**) &(actuatorLine->boundingSphereIndicesD), sizeof(int)*actuatorLine->getNumberOfIndices()));
+    setMemsizeGPU(sizeof(int)*actuatorLine->getNumberOfIndices(), false);
+}
+
+void CudaMemoryManager::cudaCopySphereIndicesHtoD(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaMemcpy(actuatorLine->boundingSphereIndicesD, actuatorLine->boundingSphereIndicesH, sizeof(int)*actuatorLine->getNumberOfIndices(), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaFreeSphereIndices(ActuatorLine* actuatorLine)
+{
+    checkCudaErrors( cudaFreeHost(actuatorLine->boundingSphereIndicesH) );
+    checkCudaErrors( cudaFree(actuatorLine->boundingSphereIndicesD) );
+}
+
+////////////////////////////////////////////////////////////////////////////////////
+//  Probe
+///////////////////////////////////////////////////////////////////////////////
+
+void CudaMemoryManager::cudaAllocProbeDistances(Probe* probe, int level)
+{
+    size_t tmp = sizeof(real)*probe->getProbeStruct(level)->nPoints;
+
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->distXH, tmp) );
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->distYH, tmp) );
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->distZH, tmp) );
+
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->distXD, tmp) );
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->distYD, tmp) );
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->distZD, tmp) );
+    setMemsizeGPU(3.f*tmp, false);
+}
+void CudaMemoryManager::cudaCopyProbeDistancesHtoD(Probe* probe, int level)
+{
+    size_t tmp = sizeof(real)*probe->getProbeStruct(level)->nPoints;
+
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->distXD, probe->getProbeStruct(level)->distXH, tmp, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->distYD, probe->getProbeStruct(level)->distYH, tmp, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->distZD, probe->getProbeStruct(level)->distZH, tmp, cudaMemcpyHostToDevice) );
+}
+void CudaMemoryManager::cudaCopyProbeDistancesDtoH(Probe* probe, int level)
+{
+    size_t tmp = sizeof(real)*probe->getProbeStruct(level)->nPoints;
+
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->distXH, probe->getProbeStruct(level)->distXD, tmp, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->distYH, probe->getProbeStruct(level)->distXD, tmp, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->distZH, probe->getProbeStruct(level)->distXD, tmp, cudaMemcpyDeviceToHost) );
+}
+void CudaMemoryManager::cudaFreeProbeDistances(Probe* probe, int level)
+{
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->distXH) );
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->distYH) );
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->distZH) );
+
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->distXD) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->distYD) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->distZD) );
+}
+
+void CudaMemoryManager::cudaAllocProbeIndices(Probe* probe, int level)
+{
+    size_t tmp = sizeof(int)*probe->getProbeStruct(level)->nPoints;
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->pointIndicesH, tmp) );
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->pointIndicesD, tmp) );
+    setMemsizeGPU(1.f*tmp, false);
+}
+void CudaMemoryManager::cudaCopyProbeIndicesHtoD(Probe* probe, int level)
+{
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesD, probe->getProbeStruct(level)->pointIndicesH, sizeof(int)*probe->getProbeStruct(level)->nPoints, cudaMemcpyHostToDevice) );
+}
+void CudaMemoryManager::cudaCopyProbeIndicesDtoH(Probe* probe, int level)
+{
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesH, probe->getProbeStruct(level)->pointIndicesD, sizeof(int)*probe->getProbeStruct(level)->nPoints, cudaMemcpyDeviceToHost) );
+}
+void CudaMemoryManager::cudaFreeProbeIndices(Probe* probe, int level)
+{
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->pointIndicesH) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->pointIndicesD) );
+}
+
+void CudaMemoryManager::cudaAllocProbeQuantityArray(Probe* probe, int level)
+{
+    size_t tmp = sizeof(real)*probe->getProbeStruct(level)->nArrays*probe->getProbeStruct(level)->nPoints;
+
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesArrayH, tmp) );
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->quantitiesArrayD, tmp) );
+    setMemsizeGPU(1.f*tmp, false);
+}
+
+void CudaMemoryManager::cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level)
+{
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesArrayD, probe->getProbeStruct(level)->quantitiesArrayH, probe->getProbeStruct(level)->nArrays*sizeof(real)*probe->getProbeStruct(level)->nPoints, cudaMemcpyHostToDevice) );
+}
+void CudaMemoryManager::cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level)
+{
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesArrayH, probe->getProbeStruct(level)->quantitiesArrayD, probe->getProbeStruct(level)->nArrays*sizeof(real)*probe->getProbeStruct(level)->nPoints, cudaMemcpyDeviceToHost) );
+}
+void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level)
+{
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesArrayH) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->quantitiesArrayD) );
+}
+
+void CudaMemoryManager::cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level)
+{
+    size_t tmpA = int(PostProcessingVariable::LAST)*sizeof(int);
+    size_t tmpQ = int(PostProcessingVariable::LAST)*sizeof(bool);
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesH, tmpQ) );    
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->quantitiesD, tmpQ) );
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->arrayOffsetsH, tmpA) );    
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->arrayOffsetsD, tmpA) );
+    setMemsizeGPU(tmpA+tmpQ, false);
+}
+
+void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level)
+{    
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyHostToDevice) );
+}
+
+void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level)
+{
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyDeviceToHost) );
+}
+void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level)
+{
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesH) );
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->arrayOffsetsH) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->quantitiesD) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->arrayOffsetsD) );
+}
 
 
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index 4853205f510348a954fbc8dcdaed72385a30d559..1d1311bf18da4538b323a99a0c8a4c341dda5e62 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -18,6 +18,8 @@
 
 class Parameter;
 class PorousMedia;
+class ActuatorLine;
+class Probe;
 
 class VIRTUALFLUIDS_GPU_EXPORT CudaMemoryManager
 {
@@ -322,13 +324,54 @@ public:
     void cudaCopyProcessNeighborADZIndex(int lev, unsigned int processNeighbor);
     void cudaFreeProcessNeighborADZ(int lev, unsigned int processNeighbor);
 
-    
-    
-    
-    
-    
-    
-    
+    void cudaAllocBladeRadii(ActuatorLine* actuatorLine);
+    void cudaCopyBladeRadiiHtoD(ActuatorLine* actuatorLine);
+    void cudaCopyBladeRadiiDtoH(ActuatorLine* actuatorLine);
+    void cudaFreeBladeRadii(ActuatorLine* actuatorLine);
+
+    void cudaAllocBladeCoords(ActuatorLine* actuatorLine);
+    void cudaCopyBladeCoordsHtoD(ActuatorLine* actuatorLine);
+    void cudaCopyBladeCoordsDtoH(ActuatorLine* actuatorLine);
+    void cudaFreeBladeCoords(ActuatorLine* actuatorLine);
+
+    void cudaAllocBladeIndices(ActuatorLine* actuatorLine);
+    void cudaCopyBladeIndicesHtoD(ActuatorLine* actuatorLine);
+    void cudaFreeBladeIndices(ActuatorLine* actuatorLine);
+
+    void cudaAllocBladeVelocities(ActuatorLine* actuatorLine);
+    void cudaCopyBladeVelocitiesHtoD(ActuatorLine* actuatorLine);
+    void cudaCopyBladeVelocitiesDtoH(ActuatorLine* actuatorLine);
+    void cudaFreeBladeVelocities(ActuatorLine* actuatorLine);
+
+    void cudaAllocBladeForces(ActuatorLine* actuatorLine);
+    void cudaCopyBladeForcesHtoD(ActuatorLine* actuatorLine);
+    void cudaCopyBladeForcesDtoH(ActuatorLine* actuatorLine);
+    void cudaFreeBladeForces(ActuatorLine* actuatorLine);
+
+    void cudaAllocSphereIndices(ActuatorLine* actuatorLine);
+    void cudaCopySphereIndicesHtoD(ActuatorLine* actuatorLine);
+    void cudaFreeSphereIndices(ActuatorLine* actuatorLine);
+
+    void cudaAllocProbeDistances(Probe* probe, int level);
+    void cudaCopyProbeDistancesHtoD(Probe* probe, int level);
+    void cudaCopyProbeDistancesDtoH(Probe* probe, int level);
+    void cudaFreeProbeDistances(Probe* probe, int level);
+
+    void cudaAllocProbeIndices(Probe* probe, int level);
+    void cudaCopyProbeIndicesHtoD(Probe* probe, int level);
+    void cudaCopyProbeIndicesDtoH(Probe* probe, int level);
+    void cudaFreeProbeIndices(Probe* probe, int level);
+
+    void cudaAllocProbeQuantityArray(Probe* probe, int level);
+    void cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level);
+    void cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level);
+    void cudaFreeProbeQuantityArray(Probe* probe, int level);
+    
+    void cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level);
+    void cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level);
+    void cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level);
+    void cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level);
+
 private:
     CudaMemoryManager(std::shared_ptr<Parameter> parameter);
     CudaMemoryManager(const CudaMemoryManager&);
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..da438ab0fdb04c8a83bd37700b4e4735970bcd7d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
@@ -0,0 +1,203 @@
+#ifndef _GEOMETRYUTILS_H
+#define _GEOMETRYUTILS_H
+
+__inline__ __host__ __device__ void getNeighborIndicesOfBSW(  uint k, //index of BSW node
+                                        uint &ke, uint &kn, uint &kt, uint &kne, uint &kte,uint &ktn, uint &ktne,
+                                        uint* neighborX, uint* neighborY, uint* neighborZ)
+{
+    ke   = neighborX[k];
+    kn   = neighborY[k];
+    kt   = neighborZ[k];
+    kne  = neighborY[ke];
+    kte  = neighborZ[ke];
+    ktn  = neighborZ[kn];
+    ktne = neighborX[ktn];
+}
+
+__inline__ __host__ __device__ uint findNearestCellBSW(uint index, 
+                                              real* coordsX, real* coordsY, real* coordsZ, 
+                                              real posX, real posY, real posZ, 
+                                              uint* neighborsX, uint* neighborsY, uint* neighborsZ, uint* neighborsWSB)
+{
+    uint new_index = index;
+
+    while(coordsX[new_index] > posX && coordsY[new_index] > posY && coordsZ[new_index] > posZ ){ new_index = max(1, neighborsWSB[new_index]);}
+
+    while(coordsX[new_index] > posX && coordsY[new_index] > posY ){ new_index = max(1, neighborsZ[neighborsWSB[new_index]]);}
+    while(coordsX[new_index] > posX && coordsZ[new_index] > posZ ){ new_index = max(1, neighborsY[neighborsWSB[new_index]]);}
+    while(coordsY[new_index] > posY && coordsZ[new_index] > posZ ){ new_index = max(1, neighborsX[neighborsWSB[new_index]]);}
+
+    while(coordsX[new_index] > posX){ new_index = max(1, neighborsY[neighborsZ[neighborsWSB[new_index]]]);}
+    while(coordsY[new_index] > posY){ new_index = max(1, neighborsX[neighborsZ[neighborsWSB[new_index]]]);}
+    while(coordsZ[new_index] > posZ){ new_index = max(1, neighborsX[neighborsY[neighborsWSB[new_index]]]);}
+
+    while(coordsX[new_index] < posX){ new_index = max(1, neighborsX[new_index]);}
+    while(coordsY[new_index] < posY){ new_index = max(1, neighborsY[new_index]);}
+    while(coordsZ[new_index] < posZ){ new_index = max(1, neighborsZ[new_index]);}
+
+    return neighborsWSB[new_index];
+}
+
+__inline__ __host__ __device__ void getInterpolationWeights(real &dW, real &dE, real &dN, real &dS, real &dT, real &dB,
+                                        real tmpX, real tmpY, real tmpZ)
+{
+    dW = tmpX;      
+    dE = 1.f - dW;        
+    dS = tmpY;    
+    dN = 1.f - dS;      
+    dB = tmpZ;         
+    dT = 1.f - dB;     
+}
+
+__inline__ __host__ __device__ real trilinearInterpolation( real dW, real dE, real dN, real dS, real dT, real dB,
+                                        uint k,  uint ke, uint kn, uint kt, uint kne, uint kte, uint ktn, uint ktne,
+                                        real* quantity )
+{
+    real interpolatedValue = (            dE*dN*dT*quantity[k]    + dW*dN*dT*quantity[ke]
+                                        + dE*dS*dT*quantity[kn]   + dW*dS*dT*quantity[kne]
+                                        + dE*dN*dB*quantity[kt]   + dW*dN*dB*quantity[kte]
+                                        + dE*dS*dB*quantity[ktn]  + dW*dS*dB*quantity[ktne] );
+    return interpolatedValue;
+}
+
+__inline__ __host__ __device__ void translate2D(real &posX, real &posY, real &newPosX, real &newPosY, real &translationX, real &translationY)
+{
+    newPosX = posX + translationX;
+    newPosY = posY + translationY;
+}
+
+__inline__ __host__ __device__ void invTranslate2D(real &posX, real &posY, real &newPosX, real &newPosY, real &translationX, real &translationY)
+{
+    newPosX = posX - translationX;
+    newPosY = posY - translationY;
+}
+
+__inline__ __host__ __device__ void translate3D(real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &translationX, real &translationY, real &translationZ)
+{
+    newPosX = posX + translationX;
+    newPosY = posY + translationY;
+    newPosZ = posZ + translationZ;
+}
+
+__inline__ __host__ __device__ void invTranslate3D(real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &translationX, real &translationY, real &translationZ)
+{
+    newPosX = posX - translationX;
+    newPosY = posY - translationY;
+    newPosZ = posZ - translationZ;
+}
+
+__inline__ __host__ __device__ void rotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY)
+{
+    newPosX = posX*cos(angle) - posY*sin(angle);
+    newPosY = posX*sin(angle) + posY*cos(angle);  
+}
+
+__inline__ __host__ __device__ void rotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY, real &originX, real &originY)
+{
+    real tmpX, tmpY;
+    invTranslate2D(posX, posY, newPosX, newPosY, originX, originY);
+    rotate2D(angle, newPosX, newPosY, tmpX, tmpY); 
+    translate2D(tmpX, tmpY, newPosX, newPosY, originX, originY);
+}
+
+__inline__ __host__ __device__ void invRotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY)
+{
+    newPosX =  posX*cos(angle) + posY*sin(angle);
+    newPosY = -posX*sin(angle) + posY*cos(angle);  
+}
+
+__inline__ __host__ __device__ void invRotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY, real &originX, real &originY)
+{
+    real tmpX, tmpY;
+    invTranslate2D(posX, posY, newPosX, newPosY, originX, originY);
+    invRotate2D(angle, newPosX, newPosY, tmpX, tmpY); 
+    translate2D(tmpX, tmpY, newPosX, newPosY, originX, originY);
+}
+
+__inline__ __host__ __device__ void rotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+{
+    newPosX = posX;
+    rotate2D(angle, posY, posZ, newPosY, newPosZ);
+}
+
+__inline__ __host__ __device__ void rotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+{
+    real tmpX, tmpY, tmpZ;
+    invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    rotateAboutX3D(angle, newPosX, newPosY, newPosZ, tmpX, tmpY, tmpZ);
+    translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+}
+
+__inline__ __host__ __device__ void invRotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+{
+    newPosX = posX;
+    invRotate2D(angle, posY, posZ, newPosY, newPosZ);
+}
+
+__inline__ __host__ __device__ void invRotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+{
+    real tmpX, tmpY, tmpZ;
+    invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    invRotateAboutX3D(angle, newPosX, newPosY, newPosZ, tmpX, tmpY, tmpZ);
+    translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+}
+
+__inline__ __host__ __device__ void rotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+{    
+    newPosY =  posY;
+    rotate2D(angle, posX, posZ, newPosX, newPosZ);
+}
+
+__inline__ __host__ __device__ void rotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+{
+    real tmpX, tmpY, tmpZ;
+    invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    rotateAboutY3D(angle, newPosX, newPosY, newPosZ, tmpX, tmpY, tmpZ);
+    translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+}
+
+__inline__ __host__ __device__ void invRotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+{
+    newPosY =  posY;
+    invRotate2D(angle, posX, posZ, newPosX, newPosZ);
+}
+
+__inline__ __host__ __device__ void invRotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+{
+    real tmpX, tmpY, tmpZ;
+    invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    invRotateAboutY3D(angle, newPosX, newPosY, newPosZ, tmpX, tmpY, tmpZ);
+    translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+}
+
+
+__inline__ __host__ __device__ void rotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+{
+    newPosZ = posZ;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+}
+
+__inline__ __host__ __device__ void rotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+{
+    real tmpX, tmpY, tmpZ;
+    invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    rotateAboutZ3D(angle, newPosX, newPosY, newPosZ, tmpX, tmpY, tmpZ);
+    translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+}
+
+__inline__ __host__ __device__ void invRotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+{
+    newPosZ = posZ;
+    invRotate2D(angle, posX, posY, newPosX, newPosY);
+}
+
+__inline__ __host__ __device__ void invRotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+{
+    real tmpX, tmpY, tmpZ;
+    invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    invRotateAboutZ3D(angle, newPosX, newPosY, newPosZ, tmpX, tmpY, tmpZ);
+    translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+}
+
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
index 38df36655e3705d26d3ee9614970cde9d2fdc87b..b97b2778440b7d1ab32b3d0e9bb002a48df03134 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
@@ -37,7 +37,11 @@ void CumulantK17CompChim::run()
 		para->getParD(level)->d0SP.f[0],
 		para->getParD(level)->size_Mat_SP,
 		level,
+		para->getIsBodyForce(),
 		para->getForcesDev(),
+		para->getParD(level)->forceX_SP,
+		para->getParD(level)->forceY_SP,
+		para->getParD(level)->forceZ_SP,
         para->getQuadricLimitersDev(),
 		para->getParD(level)->evenOrOdd);
 	getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
index 8c95f96730293446d0f066661e706270171270a8..2b26cd2b608fb2237002d9471f518a89d78ca32b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
@@ -48,7 +48,11 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
 	real* distributions,
 	int size_Mat,
 	int level,
+    bool bodyForce,
 	real* forces,
+    real* bodyForceX,
+    real* bodyForceY,
+    real* bodyForceZ,
 	real* quadricLimiters,
 	bool isEvenTimestep)
 {
@@ -210,12 +214,25 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
         for (size_t i = 1; i <= level; i++) {
             factor *= c2o1;
         }
-        real fx = forces[0] / factor;
-        real fy = forces[1] / factor;
-        real fz = forces[2] / factor;
-        vvx += fx * c1o2;
-        vvy += fy * c1o2;
-        vvz += fz * c1o2;
+        
+        real fx = forces[0];
+        real fy = forces[1];
+        real fz = forces[2];
+
+        if( bodyForce ){
+            fx += bodyForceX[k];
+            fy += bodyForceY[k];
+            fz += bodyForceZ[k];
+
+            //Reset body force
+            bodyForceX[k] = 0.0f;
+            bodyForceY[k] = 0.0f;
+            bodyForceZ[k] = 0.0f;
+        }
+        
+        vvx += fx * c1o2 / factor;
+        vvy += fy * c1o2 / factor;
+        vvz += fz * c1o2 / factor;
         ////////////////////////////////////////////////////////////////////////////////////
         // calculate the square of velocities for this lattice node
         real vx2 = vvx * vvx;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
index 557bc08b17d36763fdb5f94980dd0e918fa29ac1..1d42d65f020dd7393321498666a298630883f6ad 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
@@ -13,7 +13,11 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
 	real* distributions,
 	int size_Mat,
 	int level,
+	bool bodyForce,
 	real* forces,
+	real* bodyForceX,
+	real* bodyForceY,
+	real* bodyForceZ,
 	real* quadricLimiters,
 	bool isEvenTimestep);
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 62d3a944176617996abd90ce6ba9b19973afeb04..737b4edad35eee4829065b89fc1b12eb2c43ef4d 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -136,6 +136,13 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
    gridProvider->allocArrays_BoundaryQs();
    gridProvider->allocArrays_OffsetScale();
 
+	for( SPtr<PreCollisionInteractor> actuator: para->getActuators()){
+		actuator->init(para.get(), gridProvider.get(), cudaManager.get());
+	}
+
+	for( SPtr<PreCollisionInteractor> probe: para->getProbes()){
+		probe->init(para.get(), gridProvider.get(), cudaManager.get());
+	}
 
    //////////////////////////////////////////////////////////////////////////
    //Kernel init
@@ -1285,4 +1292,14 @@ void Simulation::free()
 			cudaManager->cudaFreeGeomNormals(lev);
 		}
 	}
-}
\ No newline at end of file
+	//////////////////////////////////////////////////////////////////////////
+	//PreCollisionInteractors
+	for( SPtr<PreCollisionInteractor> actuator: para->getActuators()){
+		actuator->free(para.get(), cudaManager.get());
+	}
+
+	for( SPtr<PreCollisionInteractor> probe: para->getProbes()){
+		probe->free(para.get(), cudaManager.get());
+	}
+	//////////////////////////////////////////////////////////////////////////
+}
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index f394ea968a7362264fa77071d8c90b26b4d01348..2aa00708e5a440f99fe341374dd668a5e764bbdb 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -1489,6 +1489,18 @@ void Parameter::setADKernel(std::string adKernel)
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//add-methods
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Parameter::addActuator(SPtr<PreCollisionInteractor> actuator)
+{
+	actuators.push_back(actuator);
+}
+void Parameter::addProbe(SPtr<PreCollisionInteractor> probe)
+{
+	probes.push_back(probe);
+}
+
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //get-methods
@@ -1747,6 +1759,18 @@ real Parameter::getPressRatio()
 {
 	return ic.delta_press;
 }
+real Parameter::getTimeRatio()
+{
+	return this->getViscosityRatio()*pow(this->getVelocityRatio(),-2);
+}
+real Parameter::getLengthRatio()
+{
+	return this->getViscosityRatio()/this->getVelocityRatio();
+}
+real Parameter::getForceRatio()
+{
+	return this->getDensityRatio()*pow(this->getViscosityRatio(),2);
+}
 real Parameter::getRealX()
 {
 	return ic.RealX;
@@ -1887,6 +1911,14 @@ TempPressforBoundaryConditions* Parameter::getTempPressD()
 {
 	return this->TempPressD;
 }
+std::vector<SPtr<PreCollisionInteractor>> Parameter::getActuators()
+{
+	return actuators;
+}
+std::vector<SPtr<PreCollisionInteractor>> Parameter::getProbes()
+{
+	return probes;
+}
 //unsigned int Parameter::getkInflowQ()
 //{
 //   return this->kInflowQ;
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 60beb9f637644d0654732b1f2ece5df43ae1f456..06161934b52e35c2a0932277a651bbdc102424a8 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -40,7 +40,7 @@
 
 #include "LBM/D3Q27.h"
 #include "LBM/LB.h"
-
+#include "PreCollisionInteractor/PreCollisionInteractor.h"
 
 #include "VirtualFluids_GPU_export.h"
 
@@ -524,6 +524,11 @@ public:
 
     void setADKernel(std::string adKernel);
 
+    //adder
+
+	void addActuator(SPtr<PreCollisionInteractor> actuator);
+	void addProbe(SPtr<PreCollisionInteractor> probes);
+
     // getter
     double *getForcesDouble();
     real *getForcesHost();
@@ -652,7 +657,10 @@ public:
     real getViscosityRatio();
     real getVelocityRatio();
     real getDensityRatio();
-    real getPressRatio();
+    real getPressRatio();    
+    real getTimeRatio();
+    real getLengthRatio();
+    real getForceRatio();    
     real getRealX();
     real getRealY();
     real getRe();
@@ -679,6 +687,8 @@ public:
     TempVelforBoundaryConditions *getTempVelD();
     TempPressforBoundaryConditions *getTempPressH();
     TempPressforBoundaryConditions *getTempPressD();
+    std::vector<SPtr<PreCollisionInteractor>> getActuators();
+    std::vector<SPtr<PreCollisionInteractor>> getProbes();
     unsigned int getTimeDoCheckPoint();
     unsigned int getTimeDoRestart();
     bool getDoCheckPoint();
@@ -763,7 +773,7 @@ private:
     bool calcCp { false };
     bool writeVeloASCII { false };
     bool calcPlaneConc { false };
-    bool isBodyForce;
+    bool isBodyForce { false };
     int diffMod {27};
     int maxlevel {0};
     int coarse {0};
@@ -809,6 +819,10 @@ private:
 	real angularVelocity;
     unsigned int startTurn;
 
+    // PreCollisionInteractors //////////////
+    std::vector<SPtr<PreCollisionInteractor>> actuators;
+	std::vector<SPtr<PreCollisionInteractor>> probes;
+
     // Step of Ensight writing//
     unsigned int stepEnsight;
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b07156f2b0fae7bca73a75127bdd61e0e73cd48f
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
@@ -0,0 +1,383 @@
+#include "ActuatorLine.h"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
+#include "Kernel/Utilities/CudaGrid.h"
+#include "lbm/constants/NumericConstants.h"
+#include "VirtualFluids_GPU/GPU/GeometryUtils.h"
+
+#include "Parameter/Parameter.h"
+#include "DataStructureInitializer/GridProvider.h"
+#include "GPU/CudaMemoryManager.h"
+
+__host__ __device__ __inline__ real calcGaussian3D(real posX, real posY, real posZ, real destX, real destY, real destZ, real epsilon)
+{
+    real distX = destX-posX;
+    real distY = destY-posY;
+    real distZ = destZ-posZ;
+    real dist = sqrt(distX*distX+distY*distY+distZ*distZ);
+    return pow(epsilon,-3)*pow(vf::lbm::constant::cPi,-1.5f)*exp(-pow(dist/epsilon,2));
+}
+
+
+__global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, 
+                                      uint* neighborsX, uint* neighborsY, uint* neighborsZ, 
+                                      uint* neighborsWSB, 
+                                      real* vx, real* vy, real* vz, 
+                                      uint numberOfIndices, 
+                                      real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
+                                      real* bladeVelocitiesX, real* bladeVelocitiesY, real* bladeVelocitiesZ, 
+                                      uint* bladeIndices, uint numberOfNodes)
+{
+    const uint x = threadIdx.x; 
+    const uint y = blockIdx.x;
+    const uint z = blockIdx.y;
+
+    const uint nx = blockDim.x;
+    const uint ny = gridDim.x;
+
+    const uint node = nx*(ny*z + y) + x;
+
+    if(node>=numberOfNodes) return;
+
+    real bladePosX = bladeCoordsX[node];
+    real bladePosY = bladeCoordsY[node];
+    real bladePosZ = bladeCoordsZ[node];
+
+    uint old_index = bladeIndices[node];
+
+    uint k, ke, kn, kt;
+    uint kne, kte, ktn, ktne;
+
+    k = findNearestCellBSW(old_index, 
+                           gridCoordsX, gridCoordsY, gridCoordsZ, 
+                           bladePosX, bladePosY, bladePosZ, 
+                           neighborsX, neighborsY, neighborsZ, neighborsWSB);
+        
+    bladeIndices[node] = k;
+
+    getNeighborIndicesOfBSW(k, ke, kn, kt, kne, kte, ktn, ktne, neighborsX, neighborsY, neighborsZ);
+
+    real dW, dE, dN, dS, dT, dB;
+
+    real invDeltaX = 1.f/(gridCoordsX[ktne]-gridCoordsX[k]);
+    real distX = invDeltaX*(gridCoordsX[ktne]-bladePosX);
+    real distY = invDeltaX*(gridCoordsY[ktne]-bladePosY);
+    real distZ = invDeltaX*(gridCoordsZ[ktne]-bladePosZ);
+
+    getInterpolationWeights(dW, dE, dN, dS, dT, dB, 
+                            distX, distY, distZ);
+
+    bladeVelocitiesX[node] = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx);
+    bladeVelocitiesY[node] = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy);
+    bladeVelocitiesZ[node] = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz);
+
+}
+
+
+__global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ,
+                           real* gridForcesX, real* gridForcesY, real* gridForcesZ, 
+                           uint* gridIndices, uint numberOfIndices, 
+                           real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
+                           real* bladeForcesX, real* bladeForcesY,real* bladeForcesZ,
+                           real* bladeRadii,
+                           real radius,
+                           uint nBlades, uint nBladeNodes,
+                           real epsilon, real delta_x)
+{
+    const uint x = threadIdx.x; 
+    const uint y = blockIdx.x;
+    const uint z = blockIdx.y;
+
+    const uint nx = blockDim.x;
+    const uint ny = gridDim.x;
+
+    const uint index = nx*(ny*z + y) + x;
+
+    if(index>=numberOfIndices) return;
+
+    int gridIndex = gridIndices[index];
+
+    real posX = gridCoordsX[gridIndex];
+    real posY = gridCoordsY[gridIndex];
+    real posZ = gridCoordsZ[gridIndex];
+
+    real fXYZ_X = 0.0f;
+    real fXYZ_Y = 0.0f;
+    real fXYZ_Z = 0.0f;
+
+    real eta = 0.0f;
+
+    real delta_x_cubed = pow(delta_x,3);
+
+    for( uint blade=0; blade<nBlades; blade++)
+    {    
+        real last_r = 0.0f;
+        real r = 0.0f;
+
+        for( uint bladeNode=0; bladeNode<nBladeNodes; bladeNode++)
+        {
+            int node = bladeNode+blade*nBladeNodes;
+            eta = calcGaussian3D(posX, posY, posZ, bladeCoordsX[node], bladeCoordsY[node], bladeCoordsZ[node], epsilon)*delta_x_cubed;
+            r = bladeRadii[bladeNode];
+
+            fXYZ_X += bladeForcesX[node]*(r-last_r)*eta;
+            fXYZ_Y += bladeForcesY[node]*(r-last_r)*eta;
+            fXYZ_Z += bladeForcesZ[node]*(r-last_r)*eta;
+
+            last_r = r;
+        }    
+
+        fXYZ_X += bladeForcesX[nBladeNodes-1]*(radius-last_r)*eta;
+        fXYZ_Y += bladeForcesY[nBladeNodes-1]*(radius-last_r)*eta;
+        fXYZ_Z += bladeForcesZ[nBladeNodes-1]*(radius-last_r)*eta;
+    }
+
+    gridForcesX[gridIndex] = fXYZ_X;
+    gridForcesY[gridIndex] = fXYZ_Y;
+    gridForcesZ[gridIndex] = fXYZ_Z;
+}
+
+
+void ActuatorLine::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager)
+{
+    this->initBladeRadii(cudaManager);
+    this->initBladeCoords(cudaManager);    
+    this->initBladeIndices(para, cudaManager);
+    this->initBladeVelocities(cudaManager);
+    this->initBladeForces(cudaManager);    
+    this->initBoundingSphere(para, cudaManager);
+}
+
+
+void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+{
+    if (level != this->level) return;
+    
+    cudaManager->cudaCopyBladeCoordsHtoD(this);
+
+    uint numberOfThreads = para->getParH(level)->numberofthreads;
+    vf::gpu::CudaGrid bladeGrid = vf::gpu::CudaGrid(numberOfThreads, this->numberOfNodes);
+
+    interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads >>>(
+        para->getParD(this->level)->coordX_SP, para->getParD(this->level)->coordY_SP, para->getParD(this->level)->coordZ_SP,        
+        para->getParD(this->level)->neighborX_SP, para->getParD(this->level)->neighborY_SP, para->getParD(this->level)->neighborZ_SP, para->getParD(this->level)->neighborWSB_SP,
+        para->getParD(this->level)->vx_SP, para->getParD(this->level)->vy_SP, para->getParD(this->level)->vz_SP,
+        this->numberOfIndices,
+        this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD,  
+        this->bladeVelocitiesXD, this->bladeVelocitiesYD, this->bladeVelocitiesZD,  
+        this->bladeIndicesD, this->numberOfNodes);
+
+    cudaManager->cudaCopyBladeVelocitiesDtoH(this);
+
+    if(true)
+    {
+        this->calcForcesEllipticWing(para);
+    }
+
+    cudaManager->cudaCopyBladeForcesHtoD(this);
+
+    vf::gpu::CudaGrid sphereGrid = vf::gpu::CudaGrid(numberOfThreads, this->numberOfIndices);
+
+    applyBodyForces<<<sphereGrid.grid, sphereGrid.threads>>>(
+        para->getParD(this->level)->coordX_SP, para->getParD(this->level)->coordY_SP, para->getParD(this->level)->coordZ_SP,        
+        para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP,        
+        this->boundingSphereIndicesD, this->numberOfIndices,
+        this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD,  
+        this->bladeForcesXD, this->bladeForcesYD, this->bladeForcesZD,
+        this->bladeRadiiD,
+        this->diameter*0.5f,  
+        this->nBlades, this->nBladeNodes,
+        this->epsilon, this->delta_x);
+
+    real dazimuth = this->omega*this->delta_t;
+
+    this->azimuth += dazimuth;
+    this->rotateBlades(dazimuth);
+}
+
+
+void ActuatorLine::free(Parameter* para, CudaMemoryManager* cudaManager)
+{
+    cudaManager->cudaFreeBladeRadii(this);
+    cudaManager->cudaFreeBladeCoords(this);
+    cudaManager->cudaFreeBladeVelocities(this);
+    cudaManager->cudaFreeBladeForces(this);
+    cudaManager->cudaFreeBladeIndices(this);
+    cudaManager->cudaFreeSphereIndices(this);
+}
+
+
+void ActuatorLine::calcForcesEllipticWing(Parameter* para)
+{
+    real localAzimuth;
+    uint node;
+    real uXYZ_X, uXYZ_Y, uXYZ_Z;
+    real uRTZ_X, uRTZ_Y, uRTZ_Z;
+    real fXYZ_X, fXYZ_Y, fXYZ_Z;
+    real fRTZ_X, fRTZ_Y, fRTZ_Z;
+    real r;
+    real u_rel, v_rel, u_rel_sq;
+    real phi;
+    real Cl = 1.f;
+    real Cd = 0.f;
+    real c0 = 1.f;
+
+    real c, Cn, Ct;
+
+    real forceRatio = this->density*pow(this->delta_x,4)*pow(this->delta_t,-2);
+
+    for( uint blade=0; blade<this->nBlades; blade++)
+    {
+        localAzimuth = this->azimuth+2*blade*vf::lbm::constant::cPi/this->nBlades;
+        for( uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++)
+        {
+            node = bladeNode+blade*this->nBladeNodes;
+            uXYZ_X = this->bladeVelocitiesXH[node]*para->getVelocityRatio();
+            uXYZ_Y = this->bladeVelocitiesYH[node]*para->getVelocityRatio();
+            uXYZ_Z = this->bladeVelocitiesZH[node]*para->getVelocityRatio();
+
+            invRotateAboutX3D(localAzimuth, uXYZ_X, uXYZ_Y, uXYZ_Z, uRTZ_X, uRTZ_Y, uRTZ_Z);
+            r = this->bladeRadiiH[bladeNode];
+
+            u_rel = uRTZ_X;
+            v_rel = uRTZ_Y+this->omega*r;
+            u_rel_sq = u_rel*u_rel+v_rel*v_rel;
+            phi = atan2(u_rel, v_rel);
+
+            c = c0 * sqrt( 1.f- pow(4.f*r/this->diameter-1.f, 2.f) );
+            Cn =   Cl*cos(phi)+Cd*sin(phi);
+            Ct =  -Cl*sin(phi)+Cd*cos(phi);
+
+            fRTZ_X = 0.5f*u_rel_sq*c*this->density*Cn;
+            fRTZ_Y = 0.5f*u_rel_sq*c*this->density*Ct;
+            fRTZ_Z = 0.0;
+
+            rotateAboutX3D(localAzimuth, fRTZ_X, fRTZ_Y, fRTZ_Z, fXYZ_X, fXYZ_Y, fXYZ_Z);
+        
+            this->bladeForcesXH[node] = fXYZ_X/forceRatio;
+            this->bladeForcesYH[node] = fXYZ_Y/forceRatio;
+            this->bladeForcesZH[node] = fXYZ_Z/forceRatio;
+        }
+    }
+}
+
+void ActuatorLine::rotateBlades(real angle)
+{
+    for(uint node=0; node<this->nBladeNodes*this->nBlades; node++)
+    {
+        real oldCoordX = this->bladeCoordsXH[node];
+        real oldCoordY = this->bladeCoordsYH[node];
+        real oldCoordZ = this->bladeCoordsZH[node];
+
+        real newCoordX, newCoordY, newCoordZ;
+        rotateAboutX3D(angle, oldCoordX, oldCoordY, oldCoordZ, newCoordX, newCoordY, newCoordZ, this->turbinePosX, this->turbinePosY, this->turbinePosZ);
+        
+        this->bladeCoordsYH[node] = newCoordX;
+        this->bladeCoordsYH[node] = newCoordY;
+        this->bladeCoordsZH[node] = newCoordZ;
+    }
+}
+
+void ActuatorLine::initBladeRadii(CudaMemoryManager* cudaManager)
+{   
+    cudaManager->cudaAllocBladeRadii(this);
+
+    real dx = 0.5f*this->diameter/this->nBladeNodes;        
+    for(uint node=0; node<this->nBladeNodes; node++)
+    {
+        this->bladeRadiiH[node] = dx*(node+1);
+    }
+    cudaManager->cudaCopyBladeRadiiHtoD(this);
+}
+
+void ActuatorLine::initBladeCoords(CudaMemoryManager* cudaManager)
+{   
+    cudaManager->cudaAllocBladeCoords(this);
+
+    for( uint blade=0; blade<this->nBlades; blade++)
+    {
+        real localAzimuth = this->azimuth+(2*vf::lbm::constant::cPi/this->nBlades)*blade;
+        for(uint node=0; node<this->nBladeNodes; node++)
+        {
+            real coordX, coordY, coordZ;
+            real x,y,z;
+            x = 0.f;
+            y = 0.f;
+            z = this->bladeRadiiH[node];
+            rotateAboutX3D(localAzimuth, x, y, z, coordX, coordY, coordZ);
+            this->bladeCoordsXH[node+this->nBladeNodes*blade] = coordX+this->turbinePosX;
+            this->bladeCoordsYH[node+this->nBladeNodes*blade] = coordY+this->turbinePosY;
+            this->bladeCoordsZH[node+this->nBladeNodes*blade] = coordZ+this->turbinePosZ;
+        }
+    }
+    cudaManager->cudaCopyBladeCoordsHtoD(this);
+}
+
+void ActuatorLine::initBladeVelocities(CudaMemoryManager* cudaManager)
+{   
+    cudaManager->cudaAllocBladeVelocities(this);
+
+    for(uint node=0; node<this->numberOfNodes; node++)
+    {
+        this->bladeVelocitiesXH[node] = 0.f;
+        this->bladeVelocitiesYH[node] = 0.f;
+        this->bladeVelocitiesZH[node] = 0.f;
+    }
+    cudaManager->cudaCopyBladeVelocitiesHtoD(this);
+}
+
+void ActuatorLine::initBladeForces(CudaMemoryManager* cudaManager)
+{   
+    cudaManager->cudaAllocBladeForces(this);
+
+    for(uint node=0; node<this->numberOfNodes; node++)
+    {
+        this->bladeForcesXH[node] = 0.f;
+        this->bladeForcesYH[node] = 0.f;
+        this->bladeForcesZH[node] = 0.f;
+    }
+    cudaManager->cudaCopyBladeForcesHtoD(this);
+}
+
+void ActuatorLine::initBladeIndices(Parameter* para, CudaMemoryManager* cudaManager)
+{   
+    cudaManager->cudaAllocBladeIndices(this);
+
+    real* coordsX = para->getParH(this->level)->coordX_SP;
+    real* coordsY = para->getParH(this->level)->coordY_SP;
+    real* coordsZ = para->getParH(this->level)->coordZ_SP;
+
+    for(uint node=0; node<this->numberOfNodes; node++)
+    {
+        this->bladeIndicesH[node] = findNearestCellBSW(1, coordsX, coordsY, coordsZ, 
+                                                       this->bladeCoordsXH[node], this->bladeCoordsYH[node], this->bladeCoordsZH[node],
+                                                       para->getParH(this->level)->neighborX_SP, para->getParH(this->level)->neighborY_SP, para->getParH(this->level)->neighborZ_SP,
+                                                       para->getParH(this->level)->neighborWSB_SP);
+        
+    }
+    cudaManager->cudaCopyBladeIndicesHtoD(this);
+}
+
+void ActuatorLine::initBoundingSphere(Parameter* para, CudaMemoryManager* cudaManager)
+{
+    // Actuator line exists only on 1 level
+    std::vector<int> nodesInSphere;
+
+    for (uint j = 1; j <= para->getParH(this->level)->size_Mat_SP; j++)
+    {
+        const real coordX = para->getParH(this->level)->coordX_SP[j];
+        const real coordY = para->getParH(this->level)->coordY_SP[j];
+        const real coordZ = para->getParH(this->level)->coordZ_SP[j];
+        const real dist = sqrt(pow(coordX-this->turbinePosX,2)+pow(coordY-this->turbinePosY,2)+pow(coordZ-this->turbinePosZ,2));
+        
+        if(dist < 0.6*this->diameter) nodesInSphere.push_back(j);
+    }
+
+    this->numberOfIndices = uint(nodesInSphere.size());
+    cudaManager->cudaAllocSphereIndices(this);
+    std::copy(nodesInSphere.begin(), nodesInSphere.end(), this->boundingSphereIndicesH);
+    cudaManager->cudaCopySphereIndicesHtoD(this);
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
new file mode 100644
index 0000000000000000000000000000000000000000..f0de6c8e01b2f4cfc0421f02b1a75308512e34e8
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
@@ -0,0 +1,109 @@
+#ifndef ActuatorLine_H
+#define ActuatorLine_H
+
+#include "PreCollisionInteractor.h"
+#include "PointerDefinitions.h"
+
+class Parameter;
+class GridProvider;
+
+class ActuatorLine : public PreCollisionInteractor
+{
+public:
+    ActuatorLine(
+        const uint _nBlades,
+        const real _density,
+        const uint _nBladeNodes,
+        const real _epsilon,
+        real _turbinePosX, real _turbinePosY, real _turbinePosZ,
+        const real _diameter,
+        int _level,
+        const real _delta_t,
+        const real _delta_x
+    ) : nBlades(_nBlades),
+        density(_density),
+        nBladeNodes(_nBladeNodes), 
+        epsilon(_epsilon),
+        turbinePosX(_turbinePosX), turbinePosY(_turbinePosY), turbinePosZ(_turbinePosZ),
+        diameter(_diameter),
+        level(_level),
+        delta_x(_delta_x),
+        PreCollisionInteractor()
+    {
+        this->delta_t = _delta_t/pow(2,this->level);
+        this->delta_x = _delta_x/pow(2,this->level);
+        this->numberOfNodes = this->nBladeNodes*this->nBlades;
+        this->omega = 1.0f;
+        this->azimuth = 0.0f;
+
+    }
+
+    virtual  ~ActuatorLine()
+    {
+        
+    }
+
+    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager);
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t);
+    void free(Parameter* para, CudaMemoryManager* cudaManager);
+    void write(uint t);
+
+    uint getNBladeNodes(){return this->nBladeNodes;};
+    uint getNBlades(){return this->nBlades;};
+    uint getNumberOfIndices(){return this->numberOfIndices;};
+    uint getNumberOfNodes(){return this->numberOfNodes;};
+    real* getBladeCoordsX(){return this->bladeCoordsXH;};
+    real* getBladeCoordsY(){return this->bladeCoordsYH;};
+    real* getBladeCoordsZ(){return this->bladeCoordsZH;};
+    real* getBladeVelocitiesX(){return this->bladeVelocitiesXH;};
+    real* getBladeVelocitiesY(){return this->bladeVelocitiesYH;};
+    real* getBladeVelocitiesZ(){return this->bladeVelocitiesZH;};
+    real* getBladeForcesX(){return this->bladeForcesXH;};
+    real* getBladeForcesY(){return this->bladeForcesYH;};
+    real* getBladeForcesZ(){return this->bladeForcesZH;};
+
+private:
+    void initBoundingSphere(Parameter* para, CudaMemoryManager* cudaManager);
+
+    void initBladeRadii(CudaMemoryManager* cudaManager);
+    void initBladeCoords(CudaMemoryManager* cudaManager);
+    void initBladeVelocities(CudaMemoryManager* cudaManager);
+    void initBladeForces(CudaMemoryManager* cudaManager);
+    void initBladeIndices(Parameter* para, CudaMemoryManager* cudaManager);
+
+    void calcForcesEllipticWing(Parameter* para);
+    void rotateBlades(real angle);
+
+    void writeBladeCoords(uint t);
+    void writeBladeForces(uint t);
+    void writeBladeVelocities(uint t);
+    
+
+public:
+    real* bladeRadiiH;
+    real* bladeRadiiD;
+    real* bladeCoordsXH, * bladeCoordsYH, * bladeCoordsZH;
+    real* bladeCoordsXD, * bladeCoordsYD, * bladeCoordsZD;
+    real* bladeVelocitiesXH, * bladeVelocitiesYH, * bladeVelocitiesZH;
+    real* bladeVelocitiesXD, * bladeVelocitiesYD, * bladeVelocitiesZD;
+    real* bladeForcesXH, * bladeForcesYH, * bladeForcesZH;
+    real* bladeForcesXD, * bladeForcesYD, * bladeForcesZD;
+    uint* bladeIndicesH;
+    uint* bladeIndicesD; 
+    uint* boundingSphereIndicesH;
+    uint* boundingSphereIndicesD;
+    
+private:
+    const real density;
+    real turbinePosX, turbinePosY, turbinePosZ;
+    real omega, azimuth, delta_t, delta_x;
+    const real diameter;
+    const uint nBladeNodes;
+    const uint nBlades;
+    const real epsilon; // in m
+    const int level;
+    uint numberOfIndices;
+    uint numberOfNodes;
+    };
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
new file mode 100644
index 0000000000000000000000000000000000000000..78b4d5e9ba148651e78c38758624de69dd08c47d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
@@ -0,0 +1,41 @@
+#ifndef PreCollisionInteractor_H
+#define PreCollisionInteractor_H
+
+#include <string>
+#include <vector>
+
+#include "Core/DataTypes.h"
+#include "PointerDefinitions.h"
+#include "VirtualFluids_GPU_export.h"
+
+#include <cassert>
+
+class Parameter;
+class GridProvider;
+class CudaMemoryManager;
+
+class VIRTUALFLUIDS_GPU_EXPORT PreCollisionInteractor
+{
+private:
+    SPtr<Parameter> para;
+
+protected:
+    PreCollisionInteractor()
+    {
+        this->updateInterval = 1;
+    }
+
+public:
+    virtual ~PreCollisionInteractor()
+    {
+    }
+
+    virtual void init(Parameter *para, GridProvider *gridProvider, CudaMemoryManager *cudaManager) = 0;
+    virtual void interact(Parameter *para, CudaMemoryManager *cudaManager, int level, uint t) = 0;
+    virtual void free(Parameter *para, CudaMemoryManager *cudaManager) = 0;
+
+protected:
+    uint updateInterval;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
new file mode 100644
index 0000000000000000000000000000000000000000..5a90af053968d773df2623c88a8728299015d4c8
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -0,0 +1,51 @@
+#include "PlaneProbe.h"
+
+#include "Kernel/Utilities/CudaGrid.h"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
+#include "Parameter/Parameter.h"
+#include "DataStructureInitializer/GridProvider.h"
+#include "GPU/CudaMemoryManager.h"
+
+void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level,
+                            std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level,      
+                            std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
+                            int level)
+{
+    real dx = abs(para->getParH(level)->coordX_SP[1]-para->getParH(level)->coordX_SP[para->getParH(level)->neighborX_SP[1]]);
+    for(uint j=1; j<para->getParH(level)->size_Mat_SP; j++ )
+    {
+        real pointCoordX = para->getParH(level)->coordX_SP[j];
+        real pointCoordY = para->getParH(level)->coordY_SP[j];
+        real pointCoordZ = para->getParH(level)->coordZ_SP[j];
+        real distX = pointCoordX - this->posX;
+        real distY = pointCoordY - this->posY;
+        real distZ = pointCoordZ - this->posZ;
+
+        if( distX <= this->deltaX && distY <= this->deltaY && distZ <= this->deltaZ &&
+            distX >=0.f && distY >=0.f && distZ >=0.f)
+        {
+            probeIndices_level.push_back(j);
+            distX_level.push_back( distX/dx );
+            distY_level.push_back( distY/dx );
+            distZ_level.push_back( distZ/dx );
+            pointCoordsX_level.push_back( pointCoordX );
+            pointCoordsY_level.push_back( pointCoordY );
+            pointCoordsZ_level.push_back( pointCoordZ );
+        }
+    }
+}
+
+void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level)
+{
+    vf::gpu::CudaGrid grid = vf::gpu::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
+    interpQuantities<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals,
+                                                    probeStruct->distXD, probeStruct->distYD, probeStruct->distZD,
+                                                    para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, 
+                                                    para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, 
+                                                    probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD, false);
+
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
new file mode 100644
index 0000000000000000000000000000000000000000..6f6af68fc2e4b5d93502d9890a29cc4838ac6042
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
@@ -0,0 +1,42 @@
+#ifndef PlaneProbe_H
+#define PlaneProbe_H
+
+#include "Probe.h"
+
+class PlaneProbe : public Probe
+{
+public: 
+    PlaneProbe(
+        const std::string _probeName,
+        uint _tStartAvg,
+        uint _tStartOut,
+        uint _tOut
+    ): Probe(_probeName, 
+             _tStartAvg, 
+             _tStartOut, 
+             _tOut)
+    {}
+
+    void setProbePlane(real _posX, real _posY, real _posZ, real _deltaX, real _deltaY, real _deltaZ)
+    {
+        this->posX = _posX; 
+        this->posY = _posY; 
+        this->posZ = _posZ;         
+        this->deltaX = _deltaX; 
+        this->deltaY = _deltaY; 
+        this->deltaZ = _deltaZ; 
+    }
+
+private:
+    void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level,
+                    std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level,      
+                    std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
+                    int level) override;
+    void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) override;
+
+private:
+    real posX, posY, posZ;
+    real deltaX, deltaY, deltaZ;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bf8875c29252759bef9e557a173559ae035b4e94
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -0,0 +1,86 @@
+#include "PointProbe.h"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
+#include "Kernel/Utilities/CudaGrid.h"
+
+#include "Parameter/Parameter.h"
+#include "DataStructureInitializer/GridProvider.h"
+#include "GPU/CudaMemoryManager.h"
+
+void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level,
+                       std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level,      
+                       std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
+                       int level)
+{
+
+    real dx = abs(para->getParH(level)->coordX_SP[1]-para->getParH(level)->coordX_SP[para->getParH(level)->neighborX_SP[1]]);
+    for(uint j=1; j<para->getParH(level)->size_Mat_SP; j++ )
+    {    
+        for(uint point=0; point<this->pointCoordsX.size(); point++)
+        {
+            real pointCoordX = this->pointCoordsX[point];
+            real pointCoordY = this->pointCoordsY[point];
+            real pointCoordZ = this->pointCoordsZ[point];
+            real distX = pointCoordX-para->getParH(level)->coordX_SP[j];
+            real distY = pointCoordY-para->getParH(level)->coordY_SP[j];
+            real distZ = pointCoordZ-para->getParH(level)->coordZ_SP[j];
+            if( distX <=dx && distY <=dx && distZ <=dx &&
+                distX >0.f && distY >0.f && distZ >0.f)
+            {
+                probeIndices_level.push_back(j);
+                distX_level.push_back( distX/dx );
+                distY_level.push_back( distY/dx );
+                distZ_level.push_back( distZ/dx );
+                pointCoordsX_level.push_back( pointCoordX );
+                pointCoordsY_level.push_back( pointCoordY );
+                pointCoordsZ_level.push_back( pointCoordZ );
+            }
+        }
+    }
+}
+
+void PointProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level)
+{
+    vf::gpu::CudaGrid grid = vf::gpu::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
+
+    interpQuantities<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals,
+                                                    probeStruct->distXD, probeStruct->distYD, probeStruct->distZD,
+                                                    para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, 
+                                                    para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, 
+                                                    probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD, true);
+}
+
+void PointProbe::addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ)
+{
+    bool isSameLength = ( (_pointCoordsX.size()==_pointCoordsY.size()) && (_pointCoordsY.size()==_pointCoordsZ.size()));
+    assert("Probe: point lists have different lengths" && isSameLength);
+    this->pointCoordsX.insert(this->pointCoordsX.end(), _pointCoordsX.begin(),  _pointCoordsX.end());
+    this->pointCoordsY.insert(this->pointCoordsY.end(), _pointCoordsY.begin(),  _pointCoordsY.end());
+    this->pointCoordsZ.insert(this->pointCoordsZ.end(), _pointCoordsZ.begin(),  _pointCoordsZ.end());
+    printf("Added list of %u  points \n", uint(_pointCoordsX.size()) );
+}
+
+void PointProbe::addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, uint n_y, uint n_z)
+{
+    int delta_y = (pos1_y-pos0_y)/(n_y-1);
+    int delta_z = (pos1_z-pos0_z)/(n_z-1);
+
+    this->pointCoordsX.reserve(this->pointCoordsX.size()+n_y*n_z);
+    this->pointCoordsY.reserve(this->pointCoordsY.size()+n_y*n_z);
+    this->pointCoordsZ.reserve(this->pointCoordsZ.size()+n_y*n_z);
+
+    for(int n_y=0; n_y<n_y; n_y++)
+    {
+        for(int n_z=0; n_z<n_z; n_z++)
+        {
+            this->pointCoordsX.push_back(pos_x);
+            this->pointCoordsY.push_back(pos0_y+delta_y*n_y);
+            this->pointCoordsZ.push_back(pos0_z+delta_z*n_z);
+        }
+    }
+    printf("Added %u  points \n",  n_y*n_z);
+
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
new file mode 100644
index 0000000000000000000000000000000000000000..917d2cf8c4f4606fc297491782bbc92dc621cf0e
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
@@ -0,0 +1,36 @@
+#ifndef PointProbe_H
+#define PointProbe_H
+
+#include "Probe.h"
+
+class PointProbe: public Probe
+{
+public:
+    PointProbe(
+        const std::string _probeName,
+        uint _tStartAvg,
+        uint _tStartOut,
+        uint _tOut
+    ): Probe(_probeName, 
+             _tStartAvg, 
+             _tStartOut, 
+             _tOut)
+    {}
+
+    void addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ);
+    void addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, uint n_y, uint n_z);
+    
+private:
+    void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level,
+                    std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level,      
+                    std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
+                    int level) override;
+
+    void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) override;
+
+private:
+    std::vector<real> pointCoordsX, pointCoordsY, pointCoordsZ; 
+
+};
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e798f433270777f8d718218e91306f980b20e9ca
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -0,0 +1,408 @@
+#include "Probe.h"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
+#include "VirtualFluids_GPU/GPU/GeometryUtils.h"
+#include "basics/writer/WbWriterVtkXmlBinary.h"
+#include <Core/StringUtilities/StringUtil.h>
+
+#include "Parameter/Parameter.h"
+#include "DataStructureInitializer/GridProvider.h"
+#include "GPU/CudaMemoryManager.h"
+
+
+std::vector<std::string> getPostProcessingVariableNames(PostProcessingVariable variable)
+{
+    std::vector<std::string> varNames;
+    switch (variable)
+    {
+    case PostProcessingVariable::Means:
+        varNames.push_back("vx_mean");
+        varNames.push_back("vy_mean");
+        varNames.push_back("vz_mean");
+        varNames.push_back("rho_mean");
+        break;
+    case PostProcessingVariable::Variances:
+        varNames.push_back("vx_var");
+        varNames.push_back("vy_var");
+        varNames.push_back("vz_var");
+        varNames.push_back("rho_var");
+        break;
+    default:
+        break;
+    }
+    return varNames;
+}
+
+__device__ void calculateQuantities(uint n, real* quantityArray, bool* quantities, uint* quantityArrayOffsets, uint nPoints, uint node, real vx, real vy, real vz, real rho)
+{
+    //"https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm"
+    // also has extensions for higher order and covariances
+    real inv_n = 1/real(n);
+
+    if(quantities[int(PostProcessingVariable::Means)])
+    {
+        
+        uint arrOff = quantityArrayOffsets[int(PostProcessingVariable::Means)];
+        real vx_m_old  = quantityArray[(arrOff+0)*nPoints+node];
+        real vy_m_old  = quantityArray[(arrOff+1)*nPoints+node];
+        real vz_m_old  = quantityArray[(arrOff+2)*nPoints+node];
+        real rho_m_old = quantityArray[(arrOff+3)*nPoints+node];
+
+        real vx_m_new  = ( (n-1)*vx_m_old + vx  )*inv_n;
+        real vy_m_new  = ( (n-1)*vy_m_old + vy  )*inv_n;
+        real vz_m_new  = ( (n-1)*vz_m_old + vz  )*inv_n;
+        real rho_m_new = ( (n-1)*rho_m_old+ rho )*inv_n;
+
+        quantityArray[(arrOff+0)*nPoints+node] = vx_m_new;
+        quantityArray[(arrOff+1)*nPoints+node] = vy_m_new;
+        quantityArray[(arrOff+2)*nPoints+node] = vz_m_new;
+        quantityArray[(arrOff+3)*nPoints+node] = rho_m_new;
+    
+        if(quantities[int(PostProcessingVariable::Variances)])
+        {
+            arrOff = quantityArrayOffsets[int(PostProcessingVariable::Variances)];
+
+            real vx_var_old  = quantityArray[(arrOff+0)*nPoints+node];
+            real vy_var_old  = quantityArray[(arrOff+1)*nPoints+node];
+            real vz_var_old  = quantityArray[(arrOff+2)*nPoints+node];
+            real rho_var_old = quantityArray[(arrOff+3)*nPoints+node];
+
+            real vx_var_new  = ( (n-1)*(vx_var_old )+(vx  - vx_m_old )*(vx  - vx_m_new ) )*inv_n;
+            real vy_var_new  = ( (n-1)*(vy_var_old )+(vy  - vy_m_old )*(vy  - vy_m_new ) )*inv_n;
+            real vz_var_new  = ( (n-1)*(vz_var_old )+(vz  - vz_m_old )*(vz  - vz_m_new ) )*inv_n;
+            real rho_var_new = ( (n-1)*(rho_var_old)+(rho - rho_m_old)*(rho - rho_m_new) )*inv_n;
+
+            quantityArray[(arrOff+0)*nPoints+node] = vx_var_new;
+            quantityArray[(arrOff+1)*nPoints+node] = vy_var_new;
+            quantityArray[(arrOff+2)*nPoints+node] = vz_var_new;
+            quantityArray[(arrOff+3)*nPoints+node] = rho_var_new; 
+        }
+    }
+}
+
+__global__ void interpQuantities(   uint* pointIndices,
+                                    uint nPoints, uint n,
+                                    real* distX, real* distY, real* distZ,
+                                    real* vx, real* vy, real* vz, real* rho,            
+                                    uint* neighborX, uint* neighborY, uint* neighborZ,
+                                    bool* quantities,
+                                    uint* quantityArrayOffsets, real* quantityArray,
+                                    bool interpolate
+                                )
+{
+    const uint x = threadIdx.x; 
+    const uint y = blockIdx.x;
+    const uint z = blockIdx.y;
+
+    const uint nx = blockDim.x;
+    const uint ny = gridDim.x;
+
+    const uint node = nx*(ny*z + y) + x;
+
+    if(node>=nPoints) return;
+
+    // Get indices of neighbor nodes. 
+    // node referring to BSW cell as seen from probe point
+    uint k = pointIndices[node];
+    real u_interpX, u_interpY, u_interpZ, rho_interp;
+
+    if(interpolate)
+    {
+        uint ke, kn, kt, kne, kte, ktn, ktne;
+        getNeighborIndicesOfBSW(  k, ke, kn, kt, kne, kte, ktn, ktne, neighborX, neighborY, neighborZ);
+
+        // Trilinear interpolation of macroscopic quantities to probe point
+        real dW, dE, dN, dS, dT, dB;
+        getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX[node], distY[node], distZ[node]);
+
+
+        u_interpX  = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx );
+        u_interpY  = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy );
+        u_interpZ  = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz );
+        rho_interp = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, rho );
+    }
+    else
+    {
+        u_interpX = vx[k];
+        u_interpY = vy[k];
+        u_interpZ = vz[k];
+        rho_interp = rho[k];
+    }
+
+    calculateQuantities(n, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp);
+
+}
+
+
+void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager)
+{
+
+    probeParams.resize(para->getMaxLevel()+1);
+
+    for(int level=0; level<=para->getMaxLevel(); level++)
+    {
+        std::vector<int> probeIndices_level;
+        std::vector<real> distX_level;
+        std::vector<real> distY_level;
+        std::vector<real> distZ_level;        
+        std::vector<real> pointCoordsX_level;
+        std::vector<real> pointCoordsY_level;
+        std::vector<real> pointCoordsZ_level;
+
+        this->findPoints(para, gridProvider, probeIndices_level, distX_level, distY_level, distZ_level,      
+                       pointCoordsX_level, pointCoordsY_level, pointCoordsZ_level,
+                       level);
+        
+        this->addProbeStruct(cudaManager, probeIndices_level, 
+                            distX_level, distY_level, distZ_level, 
+                            pointCoordsX_level, pointCoordsX_level, pointCoordsX_level, 
+                            level);
+    }
+}
+
+
+
+void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& probeIndices,
+                                      std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ,   
+                                      std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ,
+                                      int level)
+{
+    probeParams[level] = SPtr<ProbeStruct>(new ProbeStruct);
+    probeParams[level]->vals = 1;
+    probeParams[level]->nPoints = uint(probeIndices.size());
+
+    probeParams[level]->pointCoordsX = (real*)malloc(probeParams[level]->nPoints*sizeof(real));
+    probeParams[level]->pointCoordsY = (real*)malloc(probeParams[level]->nPoints*sizeof(real));
+    probeParams[level]->pointCoordsZ = (real*)malloc(probeParams[level]->nPoints*sizeof(real));
+
+    std::copy(pointCoordsX.begin(), pointCoordsX.end(), probeParams[level]->pointCoordsX);
+    std::copy(pointCoordsY.begin(), pointCoordsY.end(), probeParams[level]->pointCoordsY);
+    std::copy(pointCoordsZ.begin(), pointCoordsZ.end(), probeParams[level]->pointCoordsZ);
+
+    // Might have to catch nPoints=0 ?!?!
+    cudaManager->cudaAllocProbeDistances(this, level);
+    cudaManager->cudaAllocProbeIndices(this, level);
+
+    std::copy(distX.begin(), distX.end(), probeParams[level]->distXH);
+    std::copy(distY.begin(), distY.end(), probeParams[level]->distYH);
+    std::copy(distZ.begin(), distZ.end(), probeParams[level]->distZH);
+    std::copy(probeIndices.begin(), probeIndices.end(), probeParams[level]->pointIndicesH);
+
+    cudaManager->cudaCopyProbeDistancesHtoD(this, level);
+    cudaManager->cudaCopyProbeIndicesHtoD(this, level);
+
+    uint arrOffset = 0;
+
+    cudaManager->cudaAllocProbeQuantitiesAndOffsets(this, level);
+
+    for( int var=0; var<int(PostProcessingVariable::LAST); var++){
+    if(this->quantities[var])
+    {
+
+        probeParams[level]->quantitiesH[var] = true;
+        probeParams[level]->arrayOffsetsH[var] = arrOffset;
+        arrOffset += uint(getPostProcessingVariableNames(static_cast<PostProcessingVariable>(var)).size());
+    }}
+    
+    cudaManager->cudaCopyProbeQuantitiesAndOffsetsHtoD(this, level);
+
+    probeParams[level]->nArrays = arrOffset;
+
+    cudaManager->cudaAllocProbeQuantityArray(this, level);
+
+    for(uint arr=0; arr<probeParams[level]->nArrays; arr++)
+    {
+        for( uint point=0; point<probeParams[level]->nPoints; point++)
+        {
+            probeParams[level]->quantitiesArrayH[arr*probeParams[level]->nPoints+point] = 0.0f;
+        }
+    }
+    cudaManager->cudaCopyProbeQuantityArrayHtoD(this, level);
+}
+
+
+void Probe::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t)
+{
+
+    if(t>this->tStartAvg)
+    {
+        SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
+
+        this->calculateQuantities(probeStruct, para, level);
+        probeStruct->vals++;
+
+        if(max(int(t) - int(this->tStartOut), -1) % this->tOut == 0)
+        {
+            cudaManager->cudaCopyProbeQuantityArrayDtoH(this, level);
+
+            this->write(para, level, t);
+        }
+
+    }
+}
+
+void Probe::free(Parameter* para, CudaMemoryManager* cudaManager)
+{
+    for(int level=0; level<=para->getMaxLevel(); level++)
+    {
+        cudaManager->cudaFreeProbeDistances(this, level);
+        cudaManager->cudaFreeProbeIndices(this, level);
+        cudaManager->cudaFreeProbeQuantityArray(this, level);
+        cudaManager->cudaFreeProbeQuantitiesAndOffsets(this, level);
+    }
+}
+
+
+
+void Probe::addPostProcessingVariable(PostProcessingVariable variable)
+{
+    this->quantities[int(variable)] = true;
+    switch(variable)
+    {
+        case PostProcessingVariable::Variances: 
+            this->addPostProcessingVariable(PostProcessingVariable::Means); break;
+        default: break;
+    }
+}
+
+void Probe::write(Parameter* para, int level, int t)
+{
+    const uint numberOfParts = this->getProbeStruct(level)->nPoints / para->getlimitOfNodesForVTK() + 1;
+
+    std::vector<std::string> fnames;
+    for (uint i = 1; i <= numberOfParts; i++)
+	{
+        std::string fname = this->probeName + "_bin_lev_" + StringUtil::toString<int>(level)
+                                         + "_ID_" + StringUtil::toString<int>(para->getMyID())
+                                         + "_Part_" + StringUtil::toString<int>(i) 
+                                         + "_t_" + StringUtil::toString<int>(t) 
+                                         + ".vtk";
+		fnames.push_back(fname);
+        this->fileNamesForCollectionFile.push_back(fname);
+    }
+    this->writeGridFiles(para, level, fnames, t);
+
+    if(level == 0) this->writeCollectionFile(para, t);
+}
+
+void Probe::writeCollectionFile(Parameter* para, int t)
+{
+    std::string filename = this->probeName + "_bin_ID_" + StringUtil::toString<int>(para->getMyID()) 
+                                           + "_t_" + StringUtil::toString<int>(t) 
+                                           + ".vtk";
+
+    std::ofstream file;
+
+    file.open( filename + ".pvtu" );
+
+    //////////////////////////////////////////////////////////////////////////
+    
+    file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl;
+    file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
+
+    file << "    <PPointData>" << std::endl;
+
+    for(std::string varName: this->getVarNames())
+    {
+        file << "       <DataArray type=\"Float64\" Name=\""<< varName << "\" /> " << std::endl;
+    }
+    file << "    </PPointData>" << std::endl;
+
+    file << "    <PPoints>" << std::endl;
+    file << "      <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl;
+    file << "    </PPoints>" << std::endl;
+
+    for( auto& fname : this->fileNamesForCollectionFile )
+    {
+        const auto filenameWithoutPath=fname.substr( fname.find_last_of('/') + 1 );
+        file << "    <Piece Source=\"" << filenameWithoutPath << ".bin.vtu\"/>" << std::endl;
+    }
+
+    file << "  </PUnstructuredGrid>" << std::endl;
+    file << "</VTKFile>" << std::endl;
+
+    //////////////////////////////////////////////////////////////////////////
+
+    file.close();
+
+    this->fileNamesForCollectionFile.clear();
+}
+
+void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& fnames, int t)
+{
+    std::vector< UbTupleFloat3 > nodes;
+    std::vector< std::string > nodedatanames = this->getVarNames();
+
+    uint startpos = 0;
+    uint endpos = 0;
+    uint sizeOfNodes = 0;
+    std::vector< std::vector< double > > nodedata(nodedatanames.size());
+
+    SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
+
+    for (uint part = 0; part < fnames.size(); part++)
+    {        
+        startpos = part * para->getlimitOfNodesForVTK();
+        sizeOfNodes = min(para->getlimitOfNodesForVTK(), probeStruct->nPoints - startpos);
+        endpos = startpos + sizeOfNodes;
+
+        //////////////////////////////////////////////////////////////////////////
+        nodes.resize(sizeOfNodes);
+
+        for (uint pos = startpos; pos < endpos; pos++)
+        {
+            nodes[pos-startpos] = makeUbTuple(  float(probeStruct->pointCoordsX[pos]),
+                                                float(probeStruct->pointCoordsY[pos]),
+                                                float(probeStruct->pointCoordsZ[pos]));
+        }
+
+        for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes);
+
+        for( int var=0; var < int(PostProcessingVariable::LAST); var++){
+        if(this->quantities[var])
+        {
+            PostProcessingVariable quantity = static_cast<PostProcessingVariable>(var);
+            real coeff;
+            uint n_arrs = uint(getPostProcessingVariableNames(quantity).size());
+
+            switch(quantity)
+            {
+            case PostProcessingVariable::Means:
+                coeff = para->getVelocityRatio();
+            break;
+            case PostProcessingVariable::Variances:
+                coeff = pow(para->getVelocityRatio(),2);
+            break;
+            default: break;
+            }
+
+            uint arrOff = probeStruct->arrayOffsetsH[var];
+            uint arrLen = probeStruct->nPoints;
+
+            for(uint arr=0; arr<n_arrs; arr++)
+            {
+                for (uint pos = startpos; pos < endpos; pos++)
+                {
+                    nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff);
+                }
+            }
+        }}
+        WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(fnames[part], nodes, nodedatanames, nodedata);
+    }
+}
+
+std::vector<std::string> Probe::getVarNames()
+{
+    std::vector<std::string> varNames;
+    for( int var=0; var < int(PostProcessingVariable::LAST); var++){
+    if(this->quantities[var])
+    {
+        std::vector<std::string> names = getPostProcessingVariableNames(static_cast<PostProcessingVariable>(var));
+        varNames.insert(varNames.end(), names.begin(), names.end());
+    }}
+    return varNames;
+}
+
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
new file mode 100644
index 0000000000000000000000000000000000000000..8223411ffc45fdac8b3ba038f97eeb899dd3d94d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
@@ -0,0 +1,94 @@
+#ifndef Probe_H
+#define Probe_H
+
+#include <cuda.h>
+
+#include "PreCollisionInteractor/PreCollisionInteractor.h"
+#include "PointerDefinitions.h"
+
+enum class PostProcessingVariable{ 
+    // HowTo add new PostProcessingVariable: Add enum here, LAST has to stay last
+    // In interpQuantities add computation of quantity in switch statement
+    // In writeGridFiles add lb->rw conversion factor
+    // In getPostProcessingVariableNames add names
+    // If new quantity depends on other quantities i.e. mean, catch in addPostProcessingVariable
+    Means,
+    Variances,
+    LAST,
+};
+
+struct ProbeStruct{
+    uint nPoints, nArrays, vals;
+    uint *pointIndicesH, *pointIndicesD;
+    real *pointCoordsX, *pointCoordsY, *pointCoordsZ;
+    real *distXH, *distYH, *distZH, *distXD, *distYD, *distZD;
+    real *quantitiesArrayH, *quantitiesArrayD;
+    bool *quantitiesH, *quantitiesD;
+    uint *arrayOffsetsH, *arrayOffsetsD;
+};
+
+__global__ void interpQuantities(   uint* pointIndices,
+                                    uint nPoints, uint n,
+                                    real* distX, real* distY, real* distZ,
+                                    real* vx, real* vy, real* vz, real* rho,            
+                                    uint* neighborX, uint* neighborY, uint* neighborZ,
+                                    bool* quantities,
+                                    uint* quantityArrayOffsets, real* quantityArray,
+                                    bool interpolate
+                                );
+
+
+class Probe : public PreCollisionInteractor 
+{
+public:
+    Probe(
+        const std::string _probeName,
+        uint _tStartAvg,
+        uint _tStartOut,
+        uint _tOut
+    ):  probeName(_probeName),
+        tStartAvg(_tStartAvg),
+        tStartOut(_tStartOut),
+        tOut(_tOut),
+        PreCollisionInteractor()
+    {
+        assert("Output starts before averaging!" && tStartOut>=tStartAvg);
+    }
+    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager);
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t);
+    void free(Parameter* para, CudaMemoryManager* cudaManager);
+
+    SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; }
+
+    void addPostProcessingVariable(PostProcessingVariable _variable);
+
+private:
+    virtual void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level,
+                       std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level,      
+                       std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
+                       int level) = 0;
+    void addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& probeIndices,
+                        std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ,   
+                        std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ,
+                        int level);
+    virtual void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) = 0;
+
+    void write(Parameter* para, int level, int t);
+    void writeCollectionFile(Parameter* para, int t);
+    void writeGridFiles(Parameter* para, int level, std::vector<std::string >& fnames, int t);
+    std::vector<std::string> getVarNames();
+    
+private:
+    const std::string probeName;
+
+    std::vector<SPtr<ProbeStruct>> probeParams;
+    bool quantities[int(PostProcessingVariable::LAST)];
+    std::vector<std::string> fileNamesForCollectionFile;
+    std::vector<std::string> varNames;
+
+    uint tStartAvg;
+    uint tStartOut;
+    uint tOut;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt
index 56b03bded71b83d112d994959db0ba1d6245dc63..afa90bdd3f95bb71cf7f1eda6407f9b38766072a 100644
--- a/src/lbm/CMakeLists.txt
+++ b/src/lbm/CMakeLists.txt
@@ -7,6 +7,6 @@ if(BUILD_VF_CPU)
     vf_add_tests()
 endif()
 
-if(BUILD_VF_GPU)
+if(BUILD_VF_GPU OR BUILD_VF_GKS)
     add_subdirectory(cuda)
 endif()
diff --git a/src/lbm/constants/NumericConstants.h b/src/lbm/constants/NumericConstants.h
index 3d7c97a5b1adac97eafc36a49f9e4d968a854347..5c7556ceb4ccb711e2d2b6777e03a52e35fc1f19 100644
--- a/src/lbm/constants/NumericConstants.h
+++ b/src/lbm/constants/NumericConstants.h
@@ -118,6 +118,10 @@ static constexpr double c10eM30 = 1e-30;
 static constexpr double c10eM10 = 1e-10;
 static constexpr double smallSingle = 0.0000000002;
 
+static constexpr double cPi = 3.1415926535;
+static constexpr double cPio180 = 1.74532925199e-2;
+static constexpr double c180oPi = 57.2957795131;
+
 #else
 static constexpr float c1o2 = 0.5f;
 static constexpr float c3o2 = 1.5f;
@@ -227,6 +231,10 @@ static constexpr float c10eM30 = 1e-30f;
 static constexpr float c10eM10 = 1e-10f;
 static constexpr float smallSingle = 0.0000000002f;
 
+static constexpr float cPi = 3.1415926535f;
+static constexpr float cPio180 = 1.74532925199e-2f;
+static constexpr float c180oPi = 57.2957795131f;
+
 #endif
 
 }