diff --git a/CMakeLists.txt b/CMakeLists.txt
index ba35020d9d893637d82d968b68116ff6a922e99a..163ec4f05ee8b12d7641f3856ae4640565101851 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -199,3 +199,8 @@ if(BUILD_VF_GPU OR BUILD_VF_GKS)
     add_subdirectory(src/cuda)
     include (gpu.cmake)
 endif()
+
+if (BUILD_VF_PYTHON_BINDINGS)
+    add_subdirectory(${VF_THIRD_DIR}/pybind11/pybind11-2.6.0)
+    add_subdirectory(pythonbindings)
+endif()
diff --git a/Python/SlurmTests/poiseuille/settings.py b/Python/SlurmTests/poiseuille/settings.py
index f75c2b1d7133323880dd5520de0a96cb8fa87860..4b4a1e4e9cc7f6118a60c22a40c70b027e3ac4e2 100644
--- a/Python/SlurmTests/poiseuille/settings.py
+++ b/Python/SlurmTests/poiseuille/settings.py
@@ -1,7 +1,7 @@
 import os
 from acousticscaling import OneDirectionalAcousticScaling
-from pyfluids.kernel import LBMKernel, KernelType
-from pyfluids.parameters import RuntimeParameters, GridParameters, PhysicalParameters
+from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids.cpu.parameters import RuntimeParameters, GridParameters, PhysicalParameters
 
 
 grid_params = GridParameters()
diff --git a/Python/SlurmTests/poiseuille/simulation_runner.py b/Python/SlurmTests/poiseuille/simulation_runner.py
index 0b75de40b6a8f11ccd76f97f2ed9d709dc5362dd..03fb24be7ea1a6468ae25ec3aa40ab59962ef91e 100644
--- a/Python/SlurmTests/poiseuille/simulation_runner.py
+++ b/Python/SlurmTests/poiseuille/simulation_runner.py
@@ -2,7 +2,7 @@ import os
 
 from SlurmTests.poiseuille.settings import Scaling
 from poiseuille.simulation import run_simulation
-from pyfluids.writer import Writer, OutputFormat
+from pyfluids.cpu.writer import Writer, OutputFormat
 
 
 scale_level = int(os.environ["PYFLUIDS_SCALE_LEVEL"])
diff --git a/Python/acousticscaling.py b/Python/acousticscaling.py
index 50b81db064251fa269f29bf72a561567ddedafbc..a664b8e924d648b680562b9aef11bee87b3562b1 100644
--- a/Python/acousticscaling.py
+++ b/Python/acousticscaling.py
@@ -1,5 +1,5 @@
-from pyfluids.kernel import LBMKernel
-from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+from pyfluids.cpu.kernel import LBMKernel
+from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
 
 
 class OneDirectionalAcousticScaling:
@@ -13,10 +13,10 @@ class OneDirectionalAcousticScaling:
         self._runtime_params = runtime_parameters
         self._kernel = kernel
 
-    def configuration_for_scale_level(self, level: int = 1) -> (GridParameters,
+    def configuration_for_scale_level(self, level: int = 1) -> tuple[GridParameters,
                                                                 PhysicalParameters,
                                                                 RuntimeParameters,
-                                                                LBMKernel):
+                                                                LBMKernel]:
         if level < 0:
             raise ValueError("level must be >= 0")
 
diff --git a/Python/actuator_line/__init__.py b/Python/actuator_line/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a05398d52fbb770ccbbd42a7b2d387cc827ac08
--- /dev/null
+++ b/Python/actuator_line/actuator_line.py
@@ -0,0 +1,108 @@
+#%%
+import numpy as np
+from pathlib import Path
+from mpi4py import MPI
+from pyfluids import basics, gpu, logger
+#%%
+reference_diameter = 126
+
+length = np.array([30,8,8])*reference_diameter
+viscosity = 1.56e-5
+velocity = 9
+mach = 0.1
+nodes_per_diameter = 32
+
+sim_name = "ActuatorLine"
+config_file = Path(__file__).parent/Path("config.txt")
+output_path = Path(__file__).parent/Path("output")
+output_path.mkdir(exist_ok=True)
+timeStepOut = 500
+t_end = 50
+
+#%%
+logger.Logger.initialize_logger()
+basics.logger.Logger.add_stdout()
+basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
+basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
+basics.logger.Logger.enable_printed_rank_numbers(True)
+#%%
+grid_builder = gpu.MultipleGridBuilder.make_shared()
+dx = reference_diameter/nodes_per_diameter
+
+grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
+grid_builder.set_periodic_boundary_condition(False, False, False)
+grid_builder.build_grids(basics.LbmOrGks.LBM, False)
+# %%
+comm = gpu.Communicator.get_instance()
+#%%
+config = basics.ConfigurationFile()
+config.load(str(config_file))
+#%%
+para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid())
+
+dt = dx * mach / (np.sqrt(3) * velocity)
+velocity_lb = velocity * dt / dx # LB units
+viscosity_lb = viscosity * dt / (dx * dx) # LB units
+
+#%%
+para.set_devices([0])
+para.set_output_prefix(sim_name)
+para.set_output_path(str(output_path))
+para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix())
+para.set_print_files(True)
+para.set_max_level(1)
+#%%
+para.set_velocity(velocity_lb)
+para.set_viscosity(viscosity_lb)    
+para.set_velocity_ratio(dx/dt)
+para.set_main_kernel("CumulantK17CompChim")
+
+def init_func(coord_x, coord_y, coord_z):
+    return [0.0, velocity_lb, 0.0, 0.0]
+
+para.set_initial_condition(init_func)
+para.set_t_out(timeStepOut)
+para.set_t_end(int(t_end/dt))
+para.set_is_body_force(True)
+
+#%%
+grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_lb, 0.0, 0.0)
+grid_builder.set_velocity_boundary_condition(gpu.SideType.PX, velocity_lb, 0.0, 0.0)
+
+grid_builder.set_velocity_boundary_condition(gpu.SideType.MY, velocity_lb, 0.0, 0.0)
+grid_builder.set_velocity_boundary_condition(gpu.SideType.PY, velocity_lb, 0.0, 0.0)
+
+grid_builder.set_velocity_boundary_condition(gpu.SideType.MZ, velocity_lb, 0.0, 0.0)
+grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0, 0.0)
+
+#%%
+cuda_memory_manager = gpu.CudaMemoryManager.make(para)
+grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager)
+#%%
+turb_pos = np.array([3,3,3])*reference_diameter
+epsilon = 5
+density = 1.225
+level = 0
+n_blades = 3
+n_blade_nodes = 32
+alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx)
+para.add_actuator(alm)
+#%%
+point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 500, 100)
+point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter)
+point_probe.add_post_processing_variable(gpu.probes.PostProcessingVariable.Means)
+
+para.add_probe(point_probe)
+
+plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 500, 100)
+plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2])
+para.add_probe(plane_probe)
+#%%
+sim = gpu.Simulation(comm)
+kernel_factory = gpu.KernelFactory.get_instance()
+sim.set_factories(kernel_factory, gpu.PreProcessorFactory.get_instance())
+sim.init(para, grid_generator, gpu.FileWriter(), cuda_memory_manager)
+#%%
+sim.run()
+sim.free()
+MPI.Finalize()
\ No newline at end of file
diff --git a/Python/actuator_line/config.txt b/Python/actuator_line/config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4c778c4cc048f54c0a32310e6bf4a7343a263fa
--- /dev/null
+++ b/Python/actuator_line/config.txt
@@ -0,0 +1,2 @@
+Path = .
+GridPath = .
diff --git a/Python/cubeflow/simulation.py b/Python/cubeflow/simulation.py
index c8c145fdc27b5eab6a6e2df95d94f27999b6e258..9e77e8d747c072188d8d81150afa8e2ccb76a792 100644
--- a/Python/cubeflow/simulation.py
+++ b/Python/cubeflow/simulation.py
@@ -1,9 +1,9 @@
-from pyfluids import Simulation
-from pyfluids.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition, DensityBoundaryCondition
-from pyfluids.geometry import GbCuboid3D
-from pyfluids.kernel import LBMKernel, KernelType
-from pyfluids.parameters import PhysicalParameters, RuntimeParameters, GridParameters
-from pyfluids.writer import Writer, OutputFormat
+from pyfluids.cpu import Simulation
+from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition, DensityBoundaryCondition
+from pyfluids.cpu.geometry import GbCuboid3D
+from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids.cpu.parameters import PhysicalParameters, RuntimeParameters, GridParameters
+from pyfluids.cpu.writer import Writer, OutputFormat
 from pymuparser import Parser
 
 import os
diff --git a/Python/liddrivencavity/simulation.py b/Python/liddrivencavity/simulation.py
index f5e5921a3a163c8f5d2421c833ccc993eb5430c0..155fad2f6f8aade0368c8a7006b88f7985f8822c 100644
--- a/Python/liddrivencavity/simulation.py
+++ b/Python/liddrivencavity/simulation.py
@@ -1,9 +1,9 @@
-from pyfluids import Simulation
-from pyfluids.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition
-from pyfluids.geometry import GbCuboid3D
-from pyfluids.kernel import LBMKernel, KernelType
-from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
-from pyfluids.writer import Writer, OutputFormat
+from pyfluids.cpu import Simulation
+from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition
+from pyfluids.cpu.geometry import GbCuboid3D
+from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+from pyfluids.cpu.writer import Writer, OutputFormat
 from pymuparser import Parser
 
 runtime_params = RuntimeParameters()
diff --git a/Python/poiseuille/poiseuille_hpc.py b/Python/poiseuille/poiseuille_hpc.py
index b9bf67531f3b760bacc1912dc39b57d8b594253c..f5f5a1387c9fe234abae0c6f979cc7d5b283d1a4 100644
--- a/Python/poiseuille/poiseuille_hpc.py
+++ b/Python/poiseuille/poiseuille_hpc.py
@@ -1,5 +1,5 @@
 from poiseuille.simulation import run_simulation
-from pyfluids.parameters import *
+from pyfluids.cpu.parameters import *
 
 grid_parameters = GridParameters()
 grid_parameters.number_of_nodes_per_direction = [64, 64, 512]
diff --git a/Python/poiseuille/simulation.py b/Python/poiseuille/simulation.py
index 31ceb1ab9ef90fa4fd606bde4f47c45b8f7d7567..d107801fa84cfe16d1d7e91d31dc3ff4b8671f02 100644
--- a/Python/poiseuille/simulation.py
+++ b/Python/poiseuille/simulation.py
@@ -1,9 +1,9 @@
-from pyfluids import Simulation
-from pyfluids.boundaryconditions import NoSlipBoundaryCondition
-from pyfluids.geometry import GbCuboid3D, State
-from pyfluids.kernel import LBMKernel, KernelType
-from pyfluids.parameters import RuntimeParameters, GridParameters, PhysicalParameters
-from pyfluids.writer import Writer, OutputFormat
+from pyfluids.cpu import Simulation
+from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition
+from pyfluids.cpu.geometry import GbCuboid3D, State
+from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids.cpu.parameters import RuntimeParameters, GridParameters, PhysicalParameters
+from pyfluids.cpu.writer import Writer, OutputFormat
 
 default_grid_params = GridParameters()
 default_grid_params.node_distance = 10 / 32
diff --git a/Python/poiseuille/test_poiseuille_l2.py b/Python/poiseuille/test_poiseuille_l2.py
index 39c8b6dffe05e3c352e7fd340857e43d8d5a3dc8..93aa2600d5260dea7e72f3aa98db7334fe5285c6 100644
--- a/Python/poiseuille/test_poiseuille_l2.py
+++ b/Python/poiseuille/test_poiseuille_l2.py
@@ -5,8 +5,8 @@ import unittest
 import matplotlib.pyplot as plt
 import numpy as np
 import pyvista as pv
-from pyfluids.kernel import LBMKernel, KernelType
-from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
 from scipy import stats
 
 from errors import normalized_l2_error
diff --git a/Python/tests/test_acousticscaling.py b/Python/tests/test_acousticscaling.py
index 2da5314529f9559f9ac316f2d1bb3f1a9d0e1211..6413123a80db8c5882fcf1dbe6f72a1f5438736c 100644
--- a/Python/tests/test_acousticscaling.py
+++ b/Python/tests/test_acousticscaling.py
@@ -1,8 +1,8 @@
 import unittest
 from typing import List
 
-from pyfluids.kernel import LBMKernel, KernelType
-from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
 
 from acousticscaling import OneDirectionalAcousticScaling
 
diff --git a/Python/tests/test_boundaryconditions.py b/Python/tests/test_boundaryconditions.py
index 5a7d61f36337398fc5621540951f15b72262b17b..e004ddfa21c78ea3d63a89f5dbc3bd7438a18ff1 100644
--- a/Python/tests/test_boundaryconditions.py
+++ b/Python/tests/test_boundaryconditions.py
@@ -1,5 +1,5 @@
 import unittest
-from pyfluids.boundaryconditions import *
+from pyfluids.cpu.boundaryconditions import *
 
 
 class BoundaryConditionsTest(unittest.TestCase):
diff --git a/Python/tests/test_geometry.py b/Python/tests/test_geometry.py
index 5e953f58b4b24393ae3a8f2994184c9c7f27eca3..5bb89eb245b6055653b78fde381da050d402b0cc 100644
--- a/Python/tests/test_geometry.py
+++ b/Python/tests/test_geometry.py
@@ -1,6 +1,6 @@
 import unittest
 
-from pyfluids.geometry import *
+from pyfluids.cpu.geometry import *
 
 
 class TestGeometry(unittest.TestCase):
diff --git a/Python/tests/test_kernel.py b/Python/tests/test_kernel.py
index b1016f15308a77c9025787e061355819cbca3874..8f58a1c869f9e292856268d43245a75f1dcfe213 100644
--- a/Python/tests/test_kernel.py
+++ b/Python/tests/test_kernel.py
@@ -1,6 +1,6 @@
 import unittest
 
-from pyfluids.kernel import LBMKernel, KernelType
+from pyfluids.cpu.kernel import LBMKernel, KernelType
 
 
 class TestLBMKernel(unittest.TestCase):
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index be0ec6d5717e094dc9cb5ad478315813b9ee68e6..ff36a632a47c6dc0033f5b3e30b36cb98ed33c40 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -144,11 +144,11 @@ void multipleLevel(const std::string& configPath)
 
     para->setMaxLevel(1);
 
+
     para->setVelocity(velocityLB);
     para->setViscosity(viscosityLB);
-
     para->setVelocityRatio( dx / dt );
-
+    para->setViscosityRatio( dx*dx/dt );
     para->setMainKernel("CumulantK17CompChim");
 
     para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
@@ -185,11 +185,10 @@ void multipleLevel(const std::string& configPath)
     uint nBlades = 3;
     uint nBladeNodes = 32;
 
-
     SPtr<ActuatorLine> actuator_line =SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) );
     para->addActuator( actuator_line );
 
-    SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", 100, 500, 100) );
+    SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", para->getOutputPath(), 100, 500, 100) );
     std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
     std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
     std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
@@ -199,7 +198,7 @@ void multipleLevel(const std::string& configPath)
     pointProbe->addPostProcessingVariable(PostProcessingVariable::Variances);
     para->addProbe( pointProbe );
 
-    SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", 100, 500, 100) );
+    SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", para->getOutputPath(), 100, 500, 100) );
     planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
     planeProbe->addPostProcessingVariable(PostProcessingVariable::Means);
     para->addProbe( planeProbe );
diff --git a/cpu.cmake b/cpu.cmake
index bad575aa3854fb2be777e90bba9ea71fb2cbba5a..a6220ec1ffb9641b824ee26b8be8497ea340173f 100644
--- a/cpu.cmake
+++ b/cpu.cmake
@@ -80,9 +80,7 @@ add_subdirectory(${VF_THIRD_DIR}/MuParser)
 add_subdirectory(src/cpu/VirtualFluidsCore)
 
 if(BUILD_VF_PYTHON_BINDINGS)
-    add_subdirectory(${VF_THIRD_DIR}/pybind11/pybind11-2.6.0)
     add_subdirectory(src/cpu/simulationconfig)
-    add_subdirectory(src/cpu/pythonbindings)
 endif()
 
 set (APPS_ROOT_CPU "${VF_ROOT_DIR}/apps/cpu/")
diff --git a/gpu.cmake b/gpu.cmake
index ead5e26bca299819513e4f7639d5431d8973c927..10482f78592aefe6d8b1aae4d30dc6088c9bca88 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -37,8 +37,8 @@ IF (BUILD_VF_GPU)
     #add_subdirectory(targets/apps/LBM/BaselNU)
     #add_subdirectory(targets/apps/LBM/BaselMultiGPU)
 
-    add_subdirectory(apps/gpu/LBM/DrivenCavity)
-    add_subdirectory(apps/gpu/LBM/WTG_RUB)
+    #add_subdirectory(apps/gpu/LBM/DrivenCavity)
+    #add_subdirectory(apps/gpu/LBM/WTG_RUB)
     #add_subdirectory(apps/gpu/LBM/gridGeneratorTest)
     #add_subdirectory(apps/gpu/LBM/TGV_3D)
     #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
@@ -136,4 +136,4 @@ endif()
 if(BUILD_VF_TRAFFIC)
     add_subdirectory(src/gpu/Traffic)
     add_subdirectory(apps/gpu/LBM/TrafficTest)
-endif()
+endif()
\ No newline at end of file
diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a84adef027fdfa2953e016693bb64570e48c1ef
--- /dev/null
+++ b/pythonbindings/CMakeLists.txt
@@ -0,0 +1,24 @@
+project(VirtualFluidsPython LANGUAGES CUDA CXX)
+IF(BUILD_VF_GPU)
+    pybind11_add_module(pyfluids src/VirtualFluidsModulesGPU.cpp)
+    set_source_files_properties(src/VirtualFluidsModulesGPU.cpp PROPERTIES LANGUAGE CUDA)
+
+    target_link_libraries(pyfluids PRIVATE GridGenerator VirtualFluids_GPU basics lbmCuda logger)
+    target_include_directories(pyfluids PRIVATE ${VF_THIRD_DIR}/cuda_samples/)
+
+ENDIF()
+IF(BUILD_VF_CPU)
+    pybind11_add_module(pyfluids src/VirtualFluidsModulesCPU.cpp)
+    pybind11_add_module(pymuparser src/muParser.cpp)
+
+    # TODO: Move this to MuParser CMakeLists.txt
+    set_target_properties(muparser PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+    target_compile_definitions(pyfluids PRIVATE VF_METIS VF_MPI)
+    target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI)
+
+    target_link_libraries(pyfluids PRIVATE simulationconfig VirtualFluidsCore muparser basics)
+    target_link_libraries(pymuparser PRIVATE muparser)
+ENDIF()
+target_include_directories(pyfluids PRIVATE ${CMAKE_SOURCE_DIR}/src/)
+target_include_directories(pyfluids PRIVATE ${CMAKE_BINARY_DIR})
\ No newline at end of file
diff --git a/pythonbindings/src/VirtualFluidsModulesCPU.cpp b/pythonbindings/src/VirtualFluidsModulesCPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2fba3da494f568f7d0d0a117a579a45c9c1b9245
--- /dev/null
+++ b/pythonbindings/src/VirtualFluidsModulesCPU.cpp
@@ -0,0 +1,12 @@
+#include <pybind11/pybind11.h>
+#include "cpu/cpu.cpp"
+
+namespace py_bindings
+{
+    namespace py = pybind11;
+
+    PYBIND11_MODULE(pyfluids, m)
+    {
+        cpu::makeModule(m);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/VirtualFluidsModulesGPU.cpp b/pythonbindings/src/VirtualFluidsModulesGPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b96971caf381faada76ee676cf60469492d055c2
--- /dev/null
+++ b/pythonbindings/src/VirtualFluidsModulesGPU.cpp
@@ -0,0 +1,19 @@
+#include <pybind11/pybind11.h>
+#include "basics/basics.cpp"
+#include "lbm/lbm.cpp"
+#include "gpu/gpu.cpp"
+#include "logger/logger.cpp"
+
+namespace py_bindings
+{
+    namespace py = pybind11;
+
+    PYBIND11_MODULE(pyfluids, m)
+    {
+        basics::makeModule(m);
+        gpu::makeModule(m);
+        lbm::makeModule(m);
+        logging::makeModule(m);
+        py::add_ostream_redirect(m, "ostream_redirect");
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/basics/basics.cpp b/pythonbindings/src/basics/basics.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..381e345d78226b25ec3a77a14340d2ef1171c8c9
--- /dev/null
+++ b/pythonbindings/src/basics/basics.cpp
@@ -0,0 +1,20 @@
+#include <pybind11/pybind11.h>
+#include "submodules/logger.cpp"
+#include "submodules/configuration_file.cpp"
+#include "submodules/lbm_or_gks.cpp"
+
+namespace basics
+{
+    namespace py = pybind11;
+
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module basicsModule = parentModule.def_submodule("basics");
+
+        logger::makeModule(basicsModule);
+        configuration::makeModule(basicsModule);
+        lbmOrGks::makeModule(basicsModule);
+        
+        return basicsModule;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/basics/submodules/configuration_file.cpp b/pythonbindings/src/basics/submodules/configuration_file.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f5a2f87135a17f5eda34a7467d95f9db6b1c21d1
--- /dev/null
+++ b/pythonbindings/src/basics/submodules/configuration_file.cpp
@@ -0,0 +1,14 @@
+#include <pybind11/pybind11.h>
+#include <basics/config/ConfigurationFile.h>
+
+namespace configuration
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<vf::basics::ConfigurationFile>(parentModule, "ConfigurationFile")
+        .def(py::init<>())
+        .def("load", &vf::basics::ConfigurationFile::load);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ed1deeca62fc57b7f44499b306e9f99b7f990604
--- /dev/null
+++ b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
@@ -0,0 +1,14 @@
+#include <pybind11/pybind11.h>
+#include "basics/Core/LbmOrGks.h"
+
+namespace lbmOrGks
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+         py::enum_<LbmOrGks>(parentModule, "LbmOrGks")
+         .value("LBM", LbmOrGks::LBM)
+         .value("GKS", LbmOrGks::GKS);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/basics/submodules/logger.cpp b/pythonbindings/src/basics/submodules/logger.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d46648e349b44243581e083f3561e8a13648f3b2
--- /dev/null
+++ b/pythonbindings/src/basics/submodules/logger.cpp
@@ -0,0 +1,36 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/iostream.h>
+#include <basics/Core/Logger/Logger.h>
+#include <basics/Core/Logger/implementations/LoggerImp.h>
+
+namespace logger
+{
+    namespace py = pybind11;
+
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module loggerModule = parentModule.def_submodule("logger");
+
+        py::class_<logging::Logger>(loggerModule, "Logger")
+        .def("add_stdout", [](){
+            logging::Logger::addStream(&std::cout);
+        })
+        .def("set_debug_level", &logging::Logger::setDebugLevel)
+        .def("time_stamp", &logging::Logger::timeStamp)
+        .def("enable_printed_rank_numbers", &logging::Logger::enablePrintedRankNumbers);
+
+        loggerModule.attr("log") = logging::out;
+        py::enum_<logging::Logger::Level>(loggerModule, "Level")
+        .value("INFO_LOW", logging::Logger::Level::INFO_LOW)
+        .value("INFO_INTERMEDIATE", logging::Logger::Level::INFO_INTERMEDIATE)
+        .value("INFO_HIGH", logging::Logger::Level::INFO_HIGH)
+        .value("WARNING", logging::Logger::Level::WARNING)
+        .value("LOGGER_ERROR", logging::Logger::Level::LOGGER_ERROR);
+
+        py::enum_<logging::Logger::TimeStamp>(loggerModule, "TimeStamp")
+        .value("ENABLE", logging::Logger::TimeStamp::ENABLE)
+        .value("DISABLE", logging::Logger::TimeStamp::DISABLE);
+
+        return loggerModule;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/cpu.cpp b/pythonbindings/src/cpu/cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..554de53b47446366693aed31d534f6145ebea8ba
--- /dev/null
+++ b/pythonbindings/src/cpu/cpu.cpp
@@ -0,0 +1,23 @@
+#include <pybind11/pybind11.h>
+#include "submodules/boundaryconditions.cpp"
+#include "submodules/simulationconfig.cpp"
+#include "submodules/geometry.cpp"
+#include "submodules/kernel.cpp"
+#include "submodules/simulationparameters.cpp"
+#include "submodules/writer.cpp"
+
+namespace cpu
+{
+    namespace py = pybind11;
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module cpuModule = parentModule.def_submodule("cpu");
+        boundaryconditions::makeModule(cpuModule);
+        simulation::makeModule(cpuModule);
+        geometry::makeModule(cpuModule);
+        kernel::makeModule(cpuModule);
+        parameters::makeModule(cpuModule);
+        writer::makeModule(cpuModule);
+        return cpuModule;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3bff7bc069ca20fe1c0cf3d1847b9714e0381505
--- /dev/null
+++ b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
@@ -0,0 +1,64 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <BoundaryConditions/DensityBCAdapter.h>
+#include <BoundaryConditions/NonReflectingOutflowBCAlgorithm.h>
+#include <BoundaryConditions/BCAdapter.h>
+#include <BoundaryConditions/NoSlipBCAdapter.h>
+#include <BoundaryConditions/VelocityBCAdapter.h>
+#include <BoundaryConditions/NoSlipBCAlgorithm.h>
+#include <BoundaryConditions/VelocityBCAlgorithm.h>
+#include <BoundaryConditions/HighViscosityNoSlipBCAlgorithm.h>
+
+namespace boundaryconditions
+{
+    namespace py = pybind11;
+    using namespace py::literals;
+
+    template<class adapter, class algorithm,
+            class = std::enable_if_t<std::is_base_of<BCAdapter, adapter>::value>,
+            class = std::enable_if_t<std::is_base_of<BCAlgorithm, algorithm>::value>>
+    class PyBoundaryCondition : public adapter
+    {
+    public:
+        template<typename ...Args>
+        PyBoundaryCondition(Args &&... args) : adapter(std::forward<Args>(args)...)
+        {
+            this->setBcAlgorithm(std::make_shared<algorithm>());
+        }
+    };
+
+    template<class adapter, class algorithm>
+    using bc_class = py::class_<PyBoundaryCondition<adapter, algorithm>, BCAdapter,
+            std::shared_ptr<PyBoundaryCondition<adapter, algorithm>>>;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module_ bcModule = parentModule.def_submodule("boundaryconditions");
+
+        auto _ = py::class_<BCAdapter, std::shared_ptr<BCAdapter>>(bcModule, "BCAdapter");
+
+        bc_class<NoSlipBCAdapter, NoSlipBCAlgorithm>(bcModule, "NoSlipBoundaryCondition")
+                .def(py::init());
+
+        bc_class<NoSlipBCAdapter, HighViscosityNoSlipBCAlgorithm>(bcModule, "HighViscosityNoSlipBoundaryCondition")
+                .def(py::init());
+
+        bc_class<VelocityBCAdapter, VelocityBCAlgorithm>(bcModule, "VelocityBoundaryCondition")
+                .def(py::init())
+                .def(py::init<bool &, bool &, bool &, mu::Parser &, double &, double &>(),
+                     "vx1"_a, "vx2"_a, "vx3"_a,
+                     "function"_a, "start_time"_a, "end_time"_a)
+                .def(py::init<bool &, bool &, bool &, mu::Parser &, mu::Parser &, mu::Parser &, double &, double &>(),
+                     "vx1"_a, "vx2"_a, "vx3"_a,
+                     "function_vx1"_a, "function_vx2"_a, "function_vx2"_a,
+                     "start_time"_a, "end_time"_a)
+                .def(py::init<double &, double &, double &, double &, double &, double &, double &, double &, double &>(),
+                     "vx1"_a, "vx1_start_time"_a, "vx1_end_time"_a,
+                     "vx2"_a, "vx2_start_time"_a, "vx2_end_time"_a,
+                     "vx3"_a, "vx3_start_time"_a, "vx3_end_time"_a);
+
+        bc_class<DensityBCAdapter, NonReflectingOutflowBCAlgorithm>(bcModule, "NonReflectingOutflow")
+                .def(py::init());
+    }
+
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/geometry.cpp b/pythonbindings/src/cpu/submodules/geometry.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7ff4dd761258d41687589d2dd89c3479093753e
--- /dev/null
+++ b/pythonbindings/src/cpu/submodules/geometry.cpp
@@ -0,0 +1,84 @@
+#include <pybind11/pybind11.h>
+#include <geometry3d/GbPoint3D.h>
+#include <geometry3d/GbObject3D.h>
+#include <geometry3d/GbCuboid3D.h>
+#include <geometry3d/GbLine3D.h>
+#include <Interactors/Interactor3D.h>
+
+
+namespace geometry
+{
+    namespace py = pybind11;
+
+    template<class GeoObject>
+    using py_geometry = py::class_<GeoObject, GbObject3D, std::shared_ptr<GeoObject>>;
+
+    std::string GbPoint3D_repr_(const GbPoint3D &instance)
+    {
+        std::ostringstream stream;
+        stream << "<GbPoint3D"
+               << " x1: " << instance.getX1Coordinate()
+               << " x2: " << instance.getX2Coordinate()
+               << " x3: " << instance.getX3Coordinate() << ">";
+
+        return stream.str();
+    }
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module geometry = parentModule.def_submodule("geometry");
+
+        py::class_<GbObject3D, std::shared_ptr<GbObject3D>>(geometry, "GbObject3D");
+
+        py_geometry<GbPoint3D>(geometry, "GbPoint3D")
+                .def(py::init())
+                .def(py::init<double &, double &, double &>())
+                .def(py::init<GbPoint3D *>())
+                .def_property("x1", &GbPoint3D::getX1Coordinate, &GbPoint3D::setX1)
+                .def_property("x2", &GbPoint3D::getX2Coordinate, &GbPoint3D::setX2)
+                .def_property("x3", &GbPoint3D::getX3Coordinate, &GbPoint3D::setX3)
+                .def("get_distance", &GbPoint3D::getDistance)
+                .def("__repr__", &GbPoint3D_repr_);
+
+        py_geometry<GbCuboid3D>(geometry, "GbCuboid3D")
+                .def(py::init())
+                .def(py::init<double &, double &, double &, double &, double &, double &>())
+                .def(py::init<GbPoint3D *, GbPoint3D *>())
+                .def(py::init<GbCuboid3D *>())
+                .def_property("point1", &GbCuboid3D::getPoint1, &GbCuboid3D::setPoint1)
+                .def_property("point2", &GbCuboid3D::getPoint2, &GbCuboid3D::setPoint2)
+                .def("__repr__", [&](GbCuboid3D &instance)
+                {
+                    std::ostringstream stream;
+                    stream << "<GbCuboid3D" << std::endl
+                           << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
+                           << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
+                    return stream.str();
+                });
+
+        py_geometry<GbLine3D>(geometry, "GbLine3D")
+                .def(py::init())
+                .def(py::init<GbPoint3D *, GbPoint3D *>())
+                .def(py::init<GbLine3D>())
+                .def_property("point1", &GbLine3D::getPoint1, &GbLine3D::setPoint1)
+                .def_property("point2", &GbLine3D::getPoint2, &GbLine3D::setPoint2)
+                .def("__repr__", [&](GbLine3D &instance)
+                {
+                    std::ostringstream stream;
+                    stream << "<GbLine3D" << std::endl
+                           << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
+                           << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
+                    return stream.str();
+                });
+
+
+        py::class_<Interactor3D, std::shared_ptr<Interactor3D>>(geometry, "State")
+                .def_readonly_static("SOLID", &Interactor3D::SOLID)
+                .def_readonly_static("INVERSESOLID", &Interactor3D::INVERSESOLID)
+                .def_readonly_static("TIMEDEPENDENT", &Interactor3D::TIMEDEPENDENT)
+                .def_readonly_static("FLUID", &Interactor3D::FLUID)
+                .def_readonly_static("MOVEABLE", &Interactor3D::MOVEABLE)
+                .def_readonly_static("CHANGENOTNECESSARY", &Interactor3D::CHANGENOTNECESSARY);
+    }
+
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/kernel.cpp b/pythonbindings/src/cpu/submodules/kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fb291790632cc2041410f60a14fca8d966283343
--- /dev/null
+++ b/pythonbindings/src/cpu/submodules/kernel.cpp
@@ -0,0 +1,45 @@
+#include <memory>
+#include <pybind11/pybind11.h>
+#include <simulationconfig/KernelFactory.h>
+#include <simulationconfig/KernelConfigStructs.h>
+
+namespace kernel
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module kernelModule = parentModule.def_submodule("kernel");
+
+        py::enum_<KernelFactory::KernelType>(kernelModule, "KernelType")
+                .value("BGK", KernelFactory::BGK)
+                .value("CompressibleCumulantFourthOrderViscosity",
+                       KernelFactory::COMPRESSIBLE_CUMULANT_4TH_ORDER_VISCOSITY);
+
+        py::class_<LBMKernelConfiguration, std::shared_ptr<LBMKernelConfiguration>>(kernelModule, "LBMKernel")
+                .def(py::init<KernelFactory::KernelType>())
+                .def_readwrite("type", &LBMKernelConfiguration::kernelType)
+                .def_readwrite("use_forcing", &LBMKernelConfiguration::useForcing)
+                .def_readwrite("forcing_in_x1", &LBMKernelConfiguration::forcingX1)
+                .def_readwrite("forcing_in_x2", &LBMKernelConfiguration::forcingX2)
+                .def_readwrite("forcing_in_x3", &LBMKernelConfiguration::forcingX3)
+                .def("set_forcing", [](LBMKernelConfiguration &kernelConfig, double x1, double x2, double x3)
+                {
+                    kernelConfig.forcingX1 = x1;
+                    kernelConfig.forcingX2 = x2;
+                    kernelConfig.forcingX3 = x3;
+                })
+                .def("__repr__", [](LBMKernelConfiguration &kernelConfig)
+                {
+                    std::ostringstream stream;
+                    stream << "<" << kernelConfig.kernelType << std::endl
+                           << "Use forcing: " << kernelConfig.useForcing << std::endl
+                           << "Forcing in x1: " << kernelConfig.forcingX1 << std::endl
+                           << "Forcing in x2: " << kernelConfig.forcingX2 << std::endl
+                           << "Forcing in x3: " << kernelConfig.forcingX3 << ">" << std::endl;
+
+                    return stream.str();
+                });
+    }
+
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/simulationconfig.cpp b/pythonbindings/src/cpu/submodules/simulationconfig.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..60af4e36af4dca67e9262dd9f5ee1f46d5b7bb58
--- /dev/null
+++ b/pythonbindings/src/cpu/submodules/simulationconfig.cpp
@@ -0,0 +1,22 @@
+#include <pybind11/pybind11.h>
+#include <simulationconfig/Simulation.h>
+
+namespace simulation
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<Simulation, std::shared_ptr<Simulation>>(parentModule, "Simulation")
+                .def(py::init())
+                .def("set_writer", &Simulation::setWriterConfiguration)
+                .def("set_grid_parameters", &Simulation::setGridParameters)
+                .def("set_physical_parameters", &Simulation::setPhysicalParameters)
+                .def("set_runtime_parameters", &Simulation::setRuntimeParameters)
+                .def("set_kernel_config", &Simulation::setKernelConfiguration)
+                .def("add_object", &Simulation::addObject)
+                .def("add_bc_adapter", &Simulation::addBCAdapter)
+                .def("run_simulation", &Simulation::run);
+    }
+
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/simulationparameters.cpp b/pythonbindings/src/cpu/submodules/simulationparameters.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..acc272f2ee412cfbafd9007b4b18610cfd0a1e9b
--- /dev/null
+++ b/pythonbindings/src/cpu/submodules/simulationparameters.cpp
@@ -0,0 +1,59 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <complex>
+#include <simulationconfig/SimulationParameters.h>
+
+namespace parameters
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module parametersModule = parentModule.def_submodule("parameters");
+
+        py::class_<PhysicalParameters, std::shared_ptr<PhysicalParameters>>(parametersModule, "PhysicalParameters")
+                .def(py::init())
+                .def_readwrite("bulk_viscosity_factor", &PhysicalParameters::bulkViscosityFactor,
+                               "The viscosity of the fluid will be multiplied with this factor to calculate its bulk viscosity. Default is 1.0")
+                .def_readwrite("lattice_viscosity", &PhysicalParameters::latticeViscosity, "Lattice viscosity");
+
+        py::class_<GridParameters, std::shared_ptr<GridParameters>>(parametersModule, "GridParameters")
+                .def(py::init())
+                .def_readwrite("node_distance", &GridParameters::nodeDistance)
+                .def_readwrite("reference_direction_index", &GridParameters::referenceDirectionIndex)
+                .def_readwrite("number_of_nodes_per_direction", &GridParameters::numberOfNodesPerDirection)
+                .def_readwrite("blocks_per_direction", &GridParameters::blocksPerDirection)
+                .def_readwrite("periodic_boundary_in_x1", &GridParameters::periodicBoundaryInX1)
+                .def_readwrite("periodic_boundary_in_x2", &GridParameters::periodicBoundaryInX2)
+                .def_readwrite("periodic_boundary_in_x3", &GridParameters::periodicBoundaryInX3)
+                .def_property_readonly("bounding_box", &GridParameters::boundingBox);
+
+        py::class_<BoundingBox, std::shared_ptr<BoundingBox>>(parametersModule, "BoundingBox")
+                .def_readonly("min_x1", &BoundingBox::minX1)
+                .def_readonly("min_x2", &BoundingBox::minX2)
+                .def_readonly("min_x3", &BoundingBox::minX3)
+                .def_readonly("max_x1", &BoundingBox::maxX1)
+                .def_readonly("max_x2", &BoundingBox::maxX2)
+                .def_readonly("max_x3", &BoundingBox::maxX3)
+                .def("__repr__", [](BoundingBox &self)
+                {
+                    std::ostringstream stream;
+                    stream << "<BoundingBox" << std::endl
+                           << "min x1: " << self.minX1 << std::endl
+                           << "min x2: " << self.minX2 << std::endl
+                           << "min x3: " << self.minX3 << std::endl
+                           << "max x1: " << self.maxX1 << std::endl
+                           << "max x2: " << self.maxX2 << std::endl
+                           << "max x3: " << self.maxX3 << std::endl << ">";
+
+                    return stream.str();
+                });
+
+        py::class_<RuntimeParameters, std::shared_ptr<RuntimeParameters>>(parametersModule, "RuntimeParameters")
+                .def(py::init())
+                .def_readwrite("number_of_timesteps", &RuntimeParameters::numberOfTimeSteps)
+                .def_readwrite("timestep_log_interval", &RuntimeParameters::timeStepLogInterval)
+                .def_readwrite("number_of_threads", &RuntimeParameters::numberOfThreads);
+
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/writer.cpp b/pythonbindings/src/cpu/submodules/writer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d5ec527a27caf63d9a3066c51e1f675b307fe0b2
--- /dev/null
+++ b/pythonbindings/src/cpu/submodules/writer.cpp
@@ -0,0 +1,21 @@
+#include <pybind11/pybind11.h>
+#include <simulationconfig/WriterConfiguration.h>
+
+namespace writer
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module writerModule = parentModule.def_submodule("writer");
+
+        py::enum_<OutputFormat>(writerModule, "OutputFormat")
+                .value("ASCII", OutputFormat::ASCII)
+                .value("BINARY", OutputFormat::BINARY);
+
+        py::class_<WriterConfiguration>(writerModule, "Writer")
+                .def(py::init())
+                .def_readwrite("output_path", &WriterConfiguration::outputPath)
+                .def_readwrite("output_format", &WriterConfiguration::outputFormat);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/gpu.cpp b/pythonbindings/src/gpu/gpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1dd960dbb3ff9c9ef21cecb36e5df90e74360726
--- /dev/null
+++ b/pythonbindings/src/gpu/gpu.cpp
@@ -0,0 +1,40 @@
+#include <pybind11/pybind11.h>
+#include "submodules/actuator_line.cpp"
+#include "submodules/pre_collision_interactor.cpp"
+#include "submodules/simulation.cpp"
+#include "submodules/parameter.cpp"
+#include "submodules/boundary_conditions.cpp"
+#include "submodules/communicator.cpp"
+#include "submodules/grid_builder.cpp"
+#include "submodules/cuda_memory_manager.cpp"
+#include "submodules/grid_provider.cpp"
+#include "submodules/probes.cpp"
+#include "submodules/kernel_factory.cpp"
+#include "submodules/pre_processor_factory.cpp"
+#include "submodules/file_writer.cpp"
+#include "submodules/grid_generator.cpp"
+
+namespace gpu
+{
+    namespace py = pybind11;
+
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module gpuModule = parentModule.def_submodule("gpu");
+        simulation::makeModule(gpuModule);
+        parameter::makeModule(gpuModule);
+        pre_collision_interactor::makeModule(gpuModule);
+        actuator_line::makeModule(gpuModule);
+        boundary_conditions::makeModule(gpuModule);
+        communicator::makeModule(gpuModule); 
+        grid_builder::makeModule(gpuModule);
+        cuda_memory_manager::makeModule(gpuModule);
+        grid_provider::makeModule(gpuModule);
+        probes::makeModule(gpuModule);
+        kernel_factory::makeModule(gpuModule);
+        pre_processor_factory::makeModule(gpuModule);
+        file_writer::makeModule(gpuModule);
+        grid_generator::makeModule(gpuModule);
+        return gpuModule;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/actuator_line.cpp b/pythonbindings/src/gpu/submodules/actuator_line.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a1792c3de9fede5eaba206cc9ff6e7b29fdfbdf9
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/actuator_line.cpp
@@ -0,0 +1,71 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/numpy.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+class PyActuatorLine : public ActuatorLine 
+{
+public:
+    using ActuatorLine::ActuatorLine; // Inherit constructors
+    void calcBladeForces() override 
+    { 
+        PYBIND11_OVERRIDE_NAME(void, ActuatorLine, "calc_blade_forces", calcBladeForces,); 
+    }
+};
+namespace actuator_line
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        using arr = py::array_t<float, py::array::c_style>;
+        
+        py::class_<ActuatorLine, PreCollisionInteractor, PyActuatorLine, std::shared_ptr<ActuatorLine>>(parentModule, "ActuatorLine", py::dynamic_attr())
+        .def(py::init<  const uint,
+                        const real,
+                        const uint,
+                        const real,
+                        real, real, real,
+                        const real,
+                        int,
+                        const real,
+                        const real>(), 
+                        "n_blades", 
+                        "density", 
+                        "n_blade_nodes", 
+                        "epsilon",
+                        "turbine_pos_x", "turbine_pos_y", "turbine_pos_z", 
+                        "diameter", 
+                        "level", 
+                        "delta_t", 
+                        "delta_x")
+        .def_property("omega", &ActuatorLine::getOmega, &ActuatorLine::setOmega)
+        .def_property("azimuth", &ActuatorLine::getAzimuth, &ActuatorLine::getAzimuth)
+        .def_property_readonly("n_blades", &ActuatorLine::getNBlades)
+        .def_property_readonly("n_blade_nodes", &ActuatorLine::getNBladeNodes)
+        .def_property_readonly("n_nodes", &ActuatorLine::getNumberOfNodes)
+        .def_property_readonly("n_indices", &ActuatorLine::getNumberOfIndices)
+        .def_property_readonly("density", &ActuatorLine::getDensity)
+        .def_property_readonly("position_x", &ActuatorLine::getPositionX)
+        .def_property_readonly("position_y", &ActuatorLine::getPositionY)
+        .def_property_readonly("position_z", &ActuatorLine::getPositionZ)
+        .def_property_readonly("position", [](ActuatorLine& al){ real position[3] = {al.getPositionX(), al.getPositionY(), al.getPositionZ()}; return arr(3, position); } )
+        .def("get_radii", [](ActuatorLine& al){ return arr(al.getNBladeNodes(), al.getBladeRadii()); } )
+        .def("get_blade_coords_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsX()); } )
+        .def("get_blade_coords_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsY()); } )
+        .def("get_blade_coords_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsZ()); } )        
+        .def("get_blade_velocities_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesX()); } )
+        .def("get_blade_velocities_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesY()); } )
+        .def("get_blade_velocities_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesZ()); } )
+        .def("get_blade_forces_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesX()); } )
+        .def("get_blade_forces_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesY()); } )
+        .def("get_blade_forces_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesZ()); } )
+        .def("set_blade_coords", [](ActuatorLine& al, arr coordsX, arr coordsY, arr coordsZ){ 
+            al.setBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); } )
+        .def("set_blade_velocities", [](ActuatorLine& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ){ 
+            al.setBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); } )
+        .def("set_blade_forces", [](ActuatorLine& al, arr forcesX, arr forcesY, arr forcesZ){ 
+            al.setBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); } )
+        .def("calc_blade_forces", &ActuatorLine::calcBladeForces);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8f941a8705c225275d25291205ebdaeef8de5c9e
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp
@@ -0,0 +1,20 @@
+#include <pybind11/pybind11.h>
+#include <gpu/GridGenerator/grid/BoundaryConditions/Side.h>
+
+namespace boundary_conditions
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::enum_<SideType>(parentModule, "SideType")
+        .value("MX", SideType::MX)
+        .value("PX", SideType::PX)
+        .value("MY", SideType::MY)
+        .value("PY", SideType::PY)
+        .value("MZ", SideType::MZ)
+        .value("PZ", SideType::PZ)
+        .value("GEOMETRY", SideType::GEOMETRY)
+        .export_values();
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/communicator.cpp b/pythonbindings/src/gpu/submodules/communicator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..edb36e2c2f774903590a16a0b406c721662827b1
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/communicator.cpp
@@ -0,0 +1,15 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/Communication/Communicator.h>
+
+namespace communicator
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<vf::gpu::Communicator, std::unique_ptr<vf::gpu::Communicator, py::nodelete>>(parentModule, "Communicator")
+        .def("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference)
+        .def("get_number_of_process", &vf::gpu::Communicator::getNummberOfProcess)
+        .def("get_pid", &vf::gpu::Communicator::getPID);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f337aeb306fd1836e5bc2962e85f45d593185836
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
@@ -0,0 +1,15 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
+#include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
+
+
+namespace cuda_memory_manager
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<CudaMemoryManager, std::shared_ptr<CudaMemoryManager>>(parentModule, "CudaMemoryManager")
+        .def("make", &CudaMemoryManager::make, py::return_value_policy::reference);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/file_writer.cpp b/pythonbindings/src/gpu/submodules/file_writer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2ad90fe7381be215b2d257b5be99caf25db1e0ae
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/file_writer.cpp
@@ -0,0 +1,17 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/Output/FileWriter.h>
+#include <gpu/VirtualFluids_GPU/Output/DataWriter.h>
+
+
+namespace file_writer
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<DataWriter, std::shared_ptr<DataWriter>>(parentModule, "_DataWriter");
+
+        py::class_<FileWriter, DataWriter, std::shared_ptr<FileWriter>>(parentModule, "FileWriter")
+        .def(py::init<>());
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_builder.cpp b/pythonbindings/src/gpu/submodules/grid_builder.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..09241f71cbe9cd013600c34e4d0a07e53a1bc79f
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/grid_builder.cpp
@@ -0,0 +1,36 @@
+#include <pybind11/pybind11.h>
+#include "gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
+#include "gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "gpu/GridGenerator/geometries/Object.h"
+#include "gpu/GridGenerator/grid/BoundaryConditions/Side.h"
+
+namespace grid_builder
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {        
+        py::class_<GridBuilder, std::shared_ptr<GridBuilder>>(parentModule, "GridBuilder")
+        .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels)
+        .def("get_grid", &GridBuilder::getGrid);
+
+        py::class_<LevelGridBuilder, GridBuilder, std::shared_ptr<LevelGridBuilder>>(parentModule, "LevelGridBuilder")
+        .def("get_grid", py::overload_cast<int, int>(&LevelGridBuilder::getGrid))
+        .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition)
+        .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition)
+        .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition)
+        .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition)
+        .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition);
+
+        py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(parentModule, "MultipleGridBuilder")
+        .def("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference)
+        .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid)
+        .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid))
+        .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid))
+        .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry))
+        .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry))
+        .def("get_number_of_levels", &MultipleGridBuilder::getNumberOfLevels)
+        .def("build_grids", &MultipleGridBuilder::buildGrids);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9babe1d6b45f69d516a4cd57377eaa6c1b37149
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp
@@ -0,0 +1,38 @@
+#include <pybind11/pybind11.h>
+#include "gpu/GridGenerator/geometries/Object.h"
+#include "gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h"
+#include "gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h"
+#include "gpu/GridGenerator/geometries/Cuboid/Cuboid.h"
+#include "gpu/GridGenerator/geometries/Sphere/Sphere.h"
+#include "gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
+
+namespace grid_generator
+{
+    namespace py = pybind11;
+    py::module makeModule(py::module_ &parentModule)
+    {  
+        py::module gridGeneratorModule = parentModule.def_submodule("grid_generator");
+
+        py::class_<BoundingBox>(gridGeneratorModule, "BoundingBox")
+        .def(py::init<real, real, real, real, real, real>(),"min_x","max_x","min_y","max_y","min_z","max_z");
+
+        py::class_<Object, std::shared_ptr<Object>>(gridGeneratorModule, "Object");
+        
+        py::class_<Conglomerate, Object, std::shared_ptr<Conglomerate>>(gridGeneratorModule, "Conglomerate")
+        .def("make_shared", &Conglomerate::makeShared, py::return_value_policy::reference)
+        .def("add", &Conglomerate::add)
+        .def("subtract", &Conglomerate::subtract);
+
+        py::class_<Cuboid, Object, std::shared_ptr<Cuboid>>(gridGeneratorModule, "Cuboid")
+        .def(py::init<const double&, const double&, const double&, const double&, const double&, const double&>(),
+                        "min_x1", "min_x2", "min_x3", "max_x1", "max_x2", "max_x3");
+
+        py::class_<Sphere, Object, std::shared_ptr<Sphere>>(gridGeneratorModule, "Sphere")
+        .def("make_shared", &Sphere::makeShared, py::return_value_policy::reference);
+
+        py::class_<TriangularMesh, Object, std::shared_ptr<TriangularMesh>>(gridGeneratorModule, "TriangularMesh")
+        .def("make", &TriangularMesh::make, py::return_value_policy::reference);
+
+        return gridGeneratorModule;
+    }
+}
diff --git a/pythonbindings/src/gpu/submodules/grid_provider.cpp b/pythonbindings/src/gpu/submodules/grid_provider.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5a5514dd8b14d24f4818740e115c8504bc973726
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/grid_provider.cpp
@@ -0,0 +1,16 @@
+#include <pybind11/pybind11.h>
+#include "gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
+#include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
+#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
+
+namespace grid_provider
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<GridProvider, std::shared_ptr<GridProvider>>(parentModule, "GridProvider")
+        .def("make_grid_generator", &GridProvider::makeGridGenerator, py::return_value_policy::reference);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/kernel_factory.cpp b/pythonbindings/src/gpu/submodules/kernel_factory.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..af710948aa75900ba9f4051360ec26dd8510118a
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/kernel_factory.cpp
@@ -0,0 +1,16 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h>
+#include <gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactory.h>
+
+namespace kernel_factory
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<KernelFactory, std::shared_ptr<KernelFactory>>(parentModule, "_KernelFactory");
+        
+        py::class_<KernelFactoryImp, KernelFactory, std::shared_ptr<KernelFactoryImp>>(parentModule, "KernelFactory")
+        .def("get_instance", &KernelFactoryImp::getInstance, py::return_value_policy::reference);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2d394da657cc7f45f1b4e7491b0d69774be7f0f8
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/parameter.cpp
@@ -0,0 +1,69 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl.h>
+#include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
+#include <basics/config/ConfigurationFile.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+
+namespace parameter
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<Parameter, std::shared_ptr<Parameter>>(parentModule, "Parameter")
+        .def(py::init<
+                const vf::basics::ConfigurationFile&, 
+                int,
+                int
+                >(),
+                "config_data",
+                "number_of_processes",
+                "my_ID")
+        .def("set_forcing", &Parameter::setForcing)
+        .def("set_diff_on", &Parameter::setDiffOn)
+        .def("set_comp_on", &Parameter::setCompOn)
+        .def("set_max_level", &Parameter::setMaxLevel)
+        .def("set_t_end", &Parameter::setTEnd)
+        .def("set_t_out", &Parameter::setTOut)
+        .def("set_t_start_out", &Parameter::setTStartOut)
+        .def("set_timestep_of_coarse_level", &Parameter::setTimestepOfCoarseLevel)
+        .def("set_output_path", &Parameter::setOutputPath)
+        .def("set_output_prefix", &Parameter::setOutputPrefix)
+        .def("set_f_name", &Parameter::setFName)
+        .def("set_print_files", &Parameter::setPrintFiles)
+        .def("set_temperature_init", &Parameter::setTemperatureInit)
+        .def("set_temperature_BC", &Parameter::setTemperatureBC)
+        .def("set_viscosity", &Parameter::setViscosity)
+        .def("set_velocity", &Parameter::setVelocity)
+        .def("set_viscosity_ratio", &Parameter::setViscosityRatio)
+        .def("set_velocity_ratio", &Parameter::setVelocityRatio)
+        .def("set_density_ratio", &Parameter::setDensityRatio)
+        .def("set_devices", &Parameter::setDevices)
+        .def("set_is_body_force", &Parameter::setIsBodyForce)
+        .def("set_main_kernel", &Parameter::setMainKernel)
+        .def("set_AD_kernel", &Parameter::setADKernel)
+        .def("set_initial_condition", [](Parameter &para, std::function<std::vector<float>(real, real, real)> &init_func)
+        {
+            para.setInitialCondition([init_func](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz)
+            {   
+                std::vector<float> values = init_func(coordX, coordY, coordZ);
+                rho = values[0];
+                vx = values[1];
+                vy = values[2];
+                vz = values[3];
+            });
+        })
+        .def("add_actuator", &Parameter::addActuator)
+        .def("add_probe", &Parameter::addProbe)
+        .def("get_output_path", &Parameter::getOutputPath)
+        .def("get_output_prefix", &Parameter::getOutputPrefix)
+        .def("get_velocity", &Parameter::getVelocity)
+        .def("get_viscosity", &Parameter::getViscosity)
+        .def("get_velocity_ratio", &Parameter::getVelocityRatio)
+        .def("get_viscosity_ratio", &Parameter::getViscosityRatio)
+        .def("get_density_ratio", &Parameter::getDensityRatio)
+        .def("get_force_ratio", &Parameter::getForceRatio)
+        ;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp b/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..362ee1a8ce6112cfa9543f1b254e10f3e35822a1
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp
@@ -0,0 +1,12 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+
+namespace pre_collision_interactor
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<PreCollisionInteractor, std::shared_ptr<PreCollisionInteractor>>(parentModule, "PreCollisionInteractor");
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/pre_processor_factory.cpp b/pythonbindings/src/gpu/submodules/pre_processor_factory.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b76ae285b413594847f6672e7f3fc0e656da3cec
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/pre_processor_factory.cpp
@@ -0,0 +1,16 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactory.h>
+#include <gpu/VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h>
+
+namespace pre_processor_factory
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<PreProcessorFactory, std::shared_ptr<PreProcessorFactory>>(parentModule, "_PreProcessorFactory");
+
+        py::class_<PreProcessorFactoryImp, PreProcessorFactory, std::shared_ptr<PreProcessorFactoryImp>>(parentModule, "PreProcessorFactory")
+        .def("get_instance", &PreProcessorFactoryImp::getInstance, py::return_value_policy::reference);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/probes.cpp b/pythonbindings/src/gpu/submodules/probes.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9b751a513d23a8a17e4e9981ababc720fc7346b0
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/probes.cpp
@@ -0,0 +1,55 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+
+namespace probes
+{
+    namespace py = pybind11;
+
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module probeModule = parentModule.def_submodule("probes");
+
+        py::enum_<PostProcessingVariable>(probeModule, "PostProcessingVariable")
+        .value("Instantaneous", PostProcessingVariable::Instantaneous)
+        .value("Means", PostProcessingVariable::Means)
+        .value("Variances", PostProcessingVariable::Variances);
+
+        py::class_<Probe, PreCollisionInteractor, std::shared_ptr<Probe>>(probeModule, "Probe")
+        .def("add_post_processing_variable", &Probe::addPostProcessingVariable);
+
+        py::class_<PointProbe, Probe, std::shared_ptr<PointProbe>>(probeModule, "PointProbe")
+        .def(py::init<
+                        const std::string,
+                        const std::string,
+                        uint,
+                        uint, 
+                        uint>(), 
+                        "probe_name",
+                        "output_path"
+                        "t_start_avg",
+                        "t_start_out",
+                        "t_out")
+        .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList)
+        .def("add_probe_points_from_x_normal_plane", &PointProbe::addProbePointsFromXNormalPlane);
+
+        py::class_<PlaneProbe, Probe, std::shared_ptr<PlaneProbe>>(probeModule, "PlaneProbe")
+        .def(py::init<
+                        const std::string,
+                        const std::string,
+                        uint,
+                        uint, 
+                        uint>(), 
+                        "probe_name",
+                        "output_path"
+                        "t_start_avg",
+                        "t_start_out",
+                        "t_out")
+        .def("set_probe_plane", &PlaneProbe::setProbePlane);
+
+        return probeModule;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/simulation.cpp b/pythonbindings/src/gpu/submodules/simulation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9683e8768417ad2422fb1a53ef6a428543bceffc
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/simulation.cpp
@@ -0,0 +1,25 @@
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/LBM/Simulation.h>
+#include <gpu/VirtualFluids_GPU/Communication/Communicator.h>
+#include <gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactory.h>
+#include <gpu/VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactory.h>
+#include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h>
+#include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
+#include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
+#include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h>
+#include <gpu/VirtualFluids_GPU/Output/DataWriter.h>
+
+namespace simulation
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<Simulation>(parentModule, "Simulation")
+        .def(py::init<vf::gpu::Communicator&>(), "communicator")
+        .def("set_factories", &Simulation::setFactories)
+        .def("init", &Simulation::init)
+        .def("run", &Simulation::run)
+        .def("free", &Simulation::free);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/lbm/lbm.cpp b/pythonbindings/src/lbm/lbm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..441b9ff372f4e4513fee58c4a8a1cd78d38582dd
--- /dev/null
+++ b/pythonbindings/src/lbm/lbm.cpp
@@ -0,0 +1,13 @@
+#include <pybind11/pybind11.h>
+
+namespace lbm
+{
+    namespace py = pybind11;
+
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module lbmModule = parentModule.def_submodule("lbm");
+
+        return lbmModule;
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/logger/logger.cpp b/pythonbindings/src/logger/logger.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..82ad3d92760ae38c0eb62b16be726e4eeaca08ac
--- /dev/null
+++ b/pythonbindings/src/logger/logger.cpp
@@ -0,0 +1,25 @@
+#include <pybind11/pybind11.h>
+#include <logger/Logger.h>
+
+namespace logging
+{
+    namespace py = pybind11;
+
+    py::module makeModule(py::module_ &parentModule)
+    {
+        py::module loggerModule = parentModule.def_submodule("logger");
+
+        py::class_<vf::logging::Logger>(loggerModule, "Logger")
+        .def("initialize_logger", &vf::logging::Logger::initalizeLogger)
+        .def("change_log_path", &vf::logging::Logger::changeLogPath);
+
+        // use f-strings (f"text {float}") in python for compounded messages
+        loggerModule.def("vf_log_trace", [](std::string arg){ VF_LOG_TRACE(arg); });        
+        loggerModule.def("vf_log_debug", [](std::string arg){ VF_LOG_DEBUG(arg); });        
+        loggerModule.def("vf_log_info", [](std::string arg){ VF_LOG_INFO(arg); });        
+        loggerModule.def("vf_log_warning", [](std::string arg){ VF_LOG_WARNING(arg); });        
+        loggerModule.def("vf_log_critical", [](std::string arg){ VF_LOG_CRITICAL(arg); });        
+
+        return loggerModule;
+    }
+} // namespace logging
diff --git a/src/cpu/pythonbindings/src/muParser.cpp b/pythonbindings/src/muParser.cpp
similarity index 100%
rename from src/cpu/pythonbindings/src/muParser.cpp
rename to pythonbindings/src/muParser.cpp
diff --git a/setup.py b/setup.py
index ffe6663be9561a209945b91bb396254b703ae892..b26e1c13d09447d17f8e9fd6e2cd0d0671595bf3 100644
--- a/setup.py
+++ b/setup.py
@@ -6,23 +6,66 @@ import subprocess
 
 from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
+from setuptools.command.install import install
+from setuptools.command.develop import develop
 from distutils.version import LooseVersion
 
+"""
+Install python wrapper of virtual fluids
+Install GPU backend with option --GPU
+(pass to pip via --install-option="--GPU")
+"""
+
 vf_cmake_args = [
     "-DBUILD_VF_PYTHON_BINDINGS=ON",
-    "-DBUILD_VF_DOUBLE_ACCURACY=ON",
     "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
     "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache",
     "-DCMAKE_C_COMPILER_LAUNCHER=ccache",
-    "-DBUILD_VF_CPU:BOOL=ON",
-    "-DBUILD_VF_GPU:BOOL=OFF",
-    "-DUSE_METIS=ON",
-    "-DUSE_MPI=ON",
     "-DBUILD_SHARED_LIBS=OFF",
-    "-DBUILD_VF_UNIT_TESTS:BOOL=ON",
     "-DBUILD_WARNINGS_AS_ERRORS=OFF"
 ]
 
+vf_cpu_cmake_args = [
+    "-DBUILD_VF_DOUBLE_ACCURACY=ON",
+    "-DBUILD_VF_CPU:BOOL=ON",
+    "-DBUILD_VF_UNIT_TESTS:BOOL=ON",
+    "-DUSE_METIS=ON",
+    "-DUSE_MPI=ON"
+]
+
+vf_gpu_cmake_args = [
+    "-DBUILD_VF_DOUBLE_ACCURACY=OFF",
+    "-DBUILD_VF_GPU:BOOL=ON",
+    "-DBUILD_VF_UNIT_TESTS:BOOL=OFF",
+]
+
+GPU = False
+
+class CommandMixin:
+    user_options = [
+        ('GPU', None, 'compile pyfluids with GPU backend'),
+    ]
+
+    def initialize_options(self):
+        super().initialize_options()
+        self.GPU = False
+
+    def finalize_options(self):
+        super().finalize_options()
+
+    def run(self):
+        global GPU
+        GPU = GPU or self.GPU
+        super().run()
+
+
+class InstallCommand(CommandMixin, install):
+    user_options = getattr(install, 'user_options', []) + CommandMixin.user_options
+
+
+class DevelopCommand(CommandMixin, develop):
+    user_options = getattr(develop, 'user_options', []) + CommandMixin.user_options
+
 
 class CMakeExtension(Extension):
     def __init__(self, name, sourcedir=''):
@@ -30,8 +73,11 @@ class CMakeExtension(Extension):
         self.sourcedir = os.path.abspath(sourcedir)
 
 
-class CMakeBuild(build_ext):
+class CMakeBuild(CommandMixin, build_ext):
+    user_options = getattr(build_ext, 'user_options', []) + CommandMixin.user_options
+
     def run(self):
+        super().run()
         try:
             out = subprocess.check_output(['cmake', '--version'])
         except OSError:
@@ -68,12 +114,16 @@ class CMakeBuild(build_ext):
             build_args += ['--', '-j2']
 
         cmake_args.extend(vf_cmake_args)
+        cmake_args.extend(vf_gpu_cmake_args if GPU else vf_cpu_cmake_args)
 
         env = os.environ.copy()
         env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
                                                               self.distribution.get_version())
         if not os.path.exists(self.build_temp):
             os.makedirs(self.build_temp)
+        cmake_cache_file = self.build_temp+"/CMakeCache.txt"
+        if os.path.exists(cmake_cache_file):
+            os.remove(cmake_cache_file)
         subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
         subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
 
@@ -82,6 +132,6 @@ setup(
     name='pyfluids',
     version='0.0.1',
     ext_modules=[CMakeExtension('pyfluids')],
-    cmdclass=dict(build_ext=CMakeBuild),
+    cmdclass={"install": InstallCommand, "develop": DevelopCommand, "build_ext": CMakeBuild},
     zip_safe=False,
 )
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index 57290025c3f6e48554e1dafe9bd101ae237b3288..6ab8efc88d02e1032c2d26c756e84d4fa33359ac 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -28,7 +28,7 @@
 //
 //! \file GridBuilder.h
 //! \ingroup grid
-//! \author Soeren Peters, Stephan Lenz, Martin Schönherr
+//! \author Soeren Peters, Stephan Lenz, Martin Sch�nherr
 //=======================================================================================
 #ifndef GridBuilder_H
 #define GridBuilder_H
@@ -37,7 +37,7 @@
 #include <string>
 #include <memory>
 
-#include "GridGenerator/global.h"
+#include "global.h"
 
 #define GEOMQS 6
 #define INLETQS 0
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
index 565a02a2807ef15679bf08fa001ea9a03f78ca4e..c36a8cb70e6bb36dccb9179b9c014e5fb7464a30 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
@@ -28,7 +28,7 @@
 //
 //! \file MultipleGridBuilder.cpp
 //! \ingroup grid
-//! \author Soeren Peters, Stephan Lenz, Martin Schönherr
+//! \author Soeren Peters, Stephan Lenz, Martin Sch�nherr
 //=======================================================================================
 #include "MultipleGridBuilder.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index 116e987664ba14b83054c823ff9cf6505fccf769..14fdadba44069dbb098f8922b208397a609275af 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -12,7 +12,7 @@ vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} G
 #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake
 #https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax
 
-set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
 
 
 vf_add_tests()
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 78f8b7f589430eb303fe4a59df2458da863427fc..1fec35ed1cf86b09c04bf861a3386cab3b35410d 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -9,9 +9,6 @@
 #include "Kernel/Kernel.h"
 #include "GPU/TurbulentViscosity.h"
 
-void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
-void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
-
 void updateGrid27(Parameter* para, 
                   vf::gpu::Communicator& comm, 
                   CudaMemoryManager* cudaManager, 
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 68288a7fdd34b4b17ed75dd461302f49dcba6f3e..e7e411807c25022957bc0218427b2b367663a23e 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -40,4 +40,8 @@ extern "C" void coarseToFine(Parameter* para, int level);
 
 extern "C" void calcTurbulentViscosity(Parameter* para, int level);
 
+extern "C" void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
+
+extern "C" void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
+
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index 70a2a8ec629809689096ce0b33a197924cf000a2..3f2834e1157f4e7f9c44fb9db8987d1b0e679a5e 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -9,7 +9,7 @@
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 
-#include <GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h>
+#include "gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
 
 class Parameter;
 class GridBuilder;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
index a66474d4fdc13617fde27bef04a2ca30cf005281..2d1ee506cc47a4d4cc6b60872f62e1b1cefb88ba 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
@@ -12,6 +12,17 @@
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
 
+__host__ __device__ __inline__ uint calcNode(uint bladeNode, uint nBladeNodes, uint blade, uint nBlades)
+{
+    return bladeNode+blade*nBladeNodes;
+}
+
+__host__ __device__ __inline__ void calcBladeAndBladeNode(uint node, uint& bladeNode, uint nBladeNodes, uint& blade, uint nBlades)
+{
+    blade = node/nBladeNodes;
+    bladeNode = node % nBladeNodes;
+}
+
 __host__ __device__ __inline__ real calcGaussian3D(real posX, real posY, real posZ, real destX, real destY, real destZ, real epsilon)
 {
     real distX = destX-posX;
@@ -21,15 +32,38 @@ __host__ __device__ __inline__ real calcGaussian3D(real posX, real posY, real po
     return pow(epsilon,-3)*pow(vf::lbm::constant::cPi,-1.5f)*exp(-pow(dist/epsilon,2));
 }
 
+__host__ __device__ __inline__ void rotateFromBladeToGlobal(
+                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
+                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
+                            real& azimuth, real& yaw)
+{
+    real tmpX, tmpY, tmpZ;
+
+    rotateAboutX3D(azimuth, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, tmpX, tmpY, tmpZ);
+    rotateAboutZ3D(yaw, tmpX, tmpY, tmpZ, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF);
+
+}
+
+__host__ __device__ __inline__ void rotateFromGlobalToBlade(
+                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
+                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
+                            real& azimuth, real& yaw)
+{
+    real tmpX, tmpY, tmpZ;
+
+    invRotateAboutZ3D(yaw, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, tmpX, tmpY, tmpZ);
+    invRotateAboutX3D(azimuth, tmpX, tmpY, tmpZ, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF);
+}
 
 __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, 
-                                      uint* neighborsX, uint* neighborsY, uint* neighborsZ, 
-                                      uint* neighborsWSB, 
+                                      uint* neighborsX, uint* neighborsY, uint* neighborsZ, uint* neighborsWSB, 
                                       real* vx, real* vy, real* vz, 
-                                      uint numberOfIndices, 
-                                      real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
+                                      real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ,
                                       real* bladeVelocitiesX, real* bladeVelocitiesY, real* bladeVelocitiesZ, 
-                                      uint* bladeIndices, uint numberOfNodes)
+                                      uint nBlades, uint nBladeNodes, 
+                                      real azimuth, real yaw, real omega, 
+                                      real turbPosX, real turbPosZ, real turbPosY,
+                                      uint* bladeIndices, real velocityRatio)
 {
     const uint x = threadIdx.x; 
     const uint y = blockIdx.x;
@@ -40,11 +74,27 @@ __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real
 
     const uint node = nx*(ny*z + y) + x;
 
-    if(node>=numberOfNodes) return;
+    uint bladeNode, blade;
+
+    calcBladeAndBladeNode(node, bladeNode, nBladeNodes, blade, nBlades);
+
+    if(node>=nBladeNodes*nBlades) return;
+
+    real bladePosX_BF = bladeCoordsX[node];
+    real bladePosY_BF = bladeCoordsY[node];
+    real bladePosZ_BF = bladeCoordsZ[node];
+
+    real bladePosX_GF, bladePosY_GF, bladePosZ_GF;
 
-    real bladePosX = bladeCoordsX[node];
-    real bladePosY = bladeCoordsY[node];
-    real bladePosZ = bladeCoordsZ[node];
+    real localAzimuth = azimuth+blade*2*vf::lbm::constant::cPi/nBlades;
+
+    rotateFromBladeToGlobal(bladePosX_BF, bladePosY_BF, bladePosZ_BF, 
+                            bladePosX_GF, bladePosY_GF, bladePosZ_GF,
+                            localAzimuth, yaw);
+
+    bladePosX_GF += turbPosX;
+    bladePosY_GF += turbPosY;
+    bladePosZ_GF += turbPosZ;
 
     uint old_index = bladeIndices[node];
 
@@ -53,7 +103,7 @@ __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real
 
     k = findNearestCellBSW(old_index, 
                            gridCoordsX, gridCoordsY, gridCoordsZ, 
-                           bladePosX, bladePosY, bladePosZ, 
+                           bladePosX_GF, bladePosY_GF, bladePosZ_GF, 
                            neighborsX, neighborsY, neighborsZ, neighborsWSB);
         
     bladeIndices[node] = k;
@@ -63,29 +113,36 @@ __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real
     real dW, dE, dN, dS, dT, dB;
 
     real invDeltaX = 1.f/(gridCoordsX[ktne]-gridCoordsX[k]);
-    real distX = invDeltaX*(gridCoordsX[ktne]-bladePosX);
-    real distY = invDeltaX*(gridCoordsY[ktne]-bladePosY);
-    real distZ = invDeltaX*(gridCoordsZ[ktne]-bladePosZ);
+    real distX = invDeltaX*(bladePosX_GF-gridCoordsX[k]);
+    real distY = invDeltaX*(bladePosY_GF-gridCoordsY[k]);
+    real distZ = invDeltaX*(bladePosZ_GF-gridCoordsZ[k]);
+
+    getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX, distY, distZ);
 
-    getInterpolationWeights(dW, dE, dN, dS, dT, dB, 
-                            distX, distY, distZ);
+    real bladeVelX_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx)*velocityRatio;
+    real bladeVelY_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy)*velocityRatio;
+    real bladeVelZ_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz)*velocityRatio;
 
-    bladeVelocitiesX[node] = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx);
-    bladeVelocitiesY[node] = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy);
-    bladeVelocitiesZ[node] = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz);
+    real bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF;
 
+    rotateFromGlobalToBlade(bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF, bladeVelX_GF, bladeVelY_GF, bladeVelZ_GF, localAzimuth, yaw);
+
+    bladeVelocitiesX[node] = bladeVelX_BF;
+    bladeVelocitiesY[node] = bladeVelY_BF+omega*bladePosZ_BF;
+    bladeVelocitiesZ[node] = bladeVelZ_BF;
 }
 
 
 __global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ,
-                           real* gridForcesX, real* gridForcesY, real* gridForcesZ, 
-                           uint* gridIndices, uint numberOfIndices, 
-                           real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
-                           real* bladeForcesX, real* bladeForcesY,real* bladeForcesZ,
-                           real* bladeRadii,
-                           real radius,
-                           uint nBlades, uint nBladeNodes,
-                           real epsilon, real delta_x)
+                                real* gridForcesX, real* gridForcesY, real* gridForcesZ, 
+                                real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
+                                real* bladeForcesX, real* bladeForcesY,real* bladeForcesZ,
+                                uint nBlades, uint nBladeNodes,
+                                real azimuth, real yaw, real omega, 
+                                real turbPosX, real turbPosZ, real turbPosY,
+                                real* bladeRadii, real forceRatio,
+                                uint* gridIndices, uint numberOfIndices, 
+                                real radius, real epsilon, real delta_x)
 {
     const uint x = threadIdx.x; 
     const uint y = blockIdx.x;
@@ -104,45 +161,61 @@ __global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* grid
     real posY = gridCoordsY[gridIndex];
     real posZ = gridCoordsZ[gridIndex];
 
-    real fXYZ_X = 0.0f;
-    real fXYZ_Y = 0.0f;
-    real fXYZ_Z = 0.0f;
+    real gridForceX = 0.0f;
+    real gridForceY = 0.0f;
+    real gridForceZ = 0.0f;
 
-    real eta = 0.0f;
+    real deltaXCubed = pow(delta_x,3);
 
-    real delta_x_cubed = pow(delta_x,3);
+    real dAzimuth = 2*vf::lbm::constant::cPi/nBlades;
 
     for( uint blade=0; blade<nBlades; blade++)
-    {    
+    { 
         real last_r = 0.0f;
         real r = 0.0f;
-
+        real localAzimuth = azimuth+blade*dAzimuth;
+        
         for( uint bladeNode=0; bladeNode<nBladeNodes; bladeNode++)
         {
-            int node = bladeNode+blade*nBladeNodes;
-            eta = calcGaussian3D(posX, posY, posZ, bladeCoordsX[node], bladeCoordsY[node], bladeCoordsZ[node], epsilon)*delta_x_cubed;
             r = bladeRadii[bladeNode];
 
-            fXYZ_X += bladeForcesX[node]*(r-last_r)*eta;
-            fXYZ_Y += bladeForcesY[node]*(r-last_r)*eta;
-            fXYZ_Z += bladeForcesZ[node]*(r-last_r)*eta;
+            uint node = calcNode(bladeNode, nBladeNodes, blade, nBlades);
 
-            last_r = r;
-        }    
+            real bladePosX_GF, bladePosY_GF, bladePosZ_GF;
+
+            rotateFromBladeToGlobal(bladeCoordsX[node], bladeCoordsY[node], bladeCoordsZ[node], 
+                                    bladePosX_GF, bladePosY_GF, bladePosZ_GF,
+                                    localAzimuth, yaw);
+
+            bladePosX_GF += turbPosX;
+            bladePosY_GF += turbPosY;
+            bladePosZ_GF += turbPosZ;
 
-        fXYZ_X += bladeForcesX[nBladeNodes-1]*(radius-last_r)*eta;
-        fXYZ_Y += bladeForcesY[nBladeNodes-1]*(radius-last_r)*eta;
-        fXYZ_Z += bladeForcesZ[nBladeNodes-1]*(radius-last_r)*eta;
+            real eta = (r-last_r)*calcGaussian3D(posX, posY, posZ, bladePosX_GF, bladePosY_GF, bladePosZ_GF, epsilon)*deltaXCubed;
+
+            real forceX_GF, forceY_GF, forceZ_GF;
+
+            rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], forceX_GF, forceY_GF, forceZ_GF, localAzimuth, yaw);
+            
+            gridForceX += forceX_GF*eta;
+            gridForceY += forceY_GF*eta;
+            gridForceZ += forceZ_GF*eta;
+
+            last_r = r;
+        }         
     }
 
-    gridForcesX[gridIndex] = fXYZ_X;
-    gridForcesY[gridIndex] = fXYZ_Y;
-    gridForcesZ[gridIndex] = fXYZ_Z;
+    real invForceRatio = 1.f/forceRatio;
+
+    gridForcesX[gridIndex] = gridForceX*invForceRatio;
+    gridForcesY[gridIndex] = gridForceY*invForceRatio;
+    gridForcesZ[gridIndex] = gridForceZ*invForceRatio;
 }
 
 
 void ActuatorLine::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager)
 {
+    if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorLine but BodyForce is not set in Parameter.");
     this->initBladeRadii(cudaManager);
     this->initBladeCoords(cudaManager);    
     this->initBladeIndices(para, cudaManager);
@@ -155,7 +228,7 @@ void ActuatorLine::init(Parameter* para, GridProvider* gridProvider, CudaMemoryM
 void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
 {
     if (level != this->level) return;
-    
+
     cudaManager->cudaCopyBladeCoordsHtoD(this);
 
     uint numberOfThreads = para->getParH(level)->numberofthreads;
@@ -165,17 +238,16 @@ void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaManager, int
         para->getParD(this->level)->coordX_SP, para->getParD(this->level)->coordY_SP, para->getParD(this->level)->coordZ_SP,        
         para->getParD(this->level)->neighborX_SP, para->getParD(this->level)->neighborY_SP, para->getParD(this->level)->neighborZ_SP, para->getParD(this->level)->neighborWSB_SP,
         para->getParD(this->level)->vx_SP, para->getParD(this->level)->vy_SP, para->getParD(this->level)->vz_SP,
-        this->numberOfIndices,
         this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD,  
         this->bladeVelocitiesXD, this->bladeVelocitiesYD, this->bladeVelocitiesZD,  
-        this->bladeIndicesD, this->numberOfNodes);
+        this->nBlades, this->nBladeNodes,
+        this->azimuth, this->yaw, this->omega, 
+        this->turbinePosX, this->turbinePosZ, this->turbinePosY,
+        this->bladeIndicesD, para->getVelocityRatio());
 
     cudaManager->cudaCopyBladeVelocitiesDtoH(this);
 
-    if(true)
-    {
-        this->calcForcesEllipticWing(para);
-    }
+    this->calcBladeForces();
 
     cudaManager->cudaCopyBladeForcesHtoD(this);
 
@@ -184,18 +256,16 @@ void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaManager, int
     applyBodyForces<<<sphereGrid.grid, sphereGrid.threads>>>(
         para->getParD(this->level)->coordX_SP, para->getParD(this->level)->coordY_SP, para->getParD(this->level)->coordZ_SP,        
         para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP,        
-        this->boundingSphereIndicesD, this->numberOfIndices,
         this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD,  
         this->bladeForcesXD, this->bladeForcesYD, this->bladeForcesZD,
-        this->bladeRadiiD,
-        this->diameter*0.5f,  
         this->nBlades, this->nBladeNodes,
-        this->epsilon, this->delta_x);
-
-    real dazimuth = this->omega*this->delta_t;
+        this->azimuth, this->yaw, this->omega, 
+        this->turbinePosX, this->turbinePosZ, this->turbinePosY,
+        this->bladeRadiiD, this->density*pow(para->getViscosityRatio(),2),
+        this->boundingSphereIndicesD, this->numberOfIndices,
+        this->diameter*0.5f, this->epsilon, this->delta_x);
 
-    this->azimuth += dazimuth;
-    this->rotateBlades(dazimuth);
+    this->azimuth = fmod(this->azimuth+this->omega*this->delta_t,2*vf::lbm::constant::cPi);
 }
 
 
@@ -210,15 +280,9 @@ void ActuatorLine::free(Parameter* para, CudaMemoryManager* cudaManager)
 }
 
 
-void ActuatorLine::calcForcesEllipticWing(Parameter* para)
+void ActuatorLine::calcForcesEllipticWing()
 {
-    real localAzimuth;
     uint node;
-    real uXYZ_X, uXYZ_Y, uXYZ_Z;
-    real uRTZ_X, uRTZ_Y, uRTZ_Z;
-    real fXYZ_X, fXYZ_Y, fXYZ_Z;
-    real fRTZ_X, fRTZ_Y, fRTZ_Z;
-    real r;
     real u_rel, v_rel, u_rel_sq;
     real phi;
     real Cl = 1.f;
@@ -227,65 +291,39 @@ void ActuatorLine::calcForcesEllipticWing(Parameter* para)
 
     real c, Cn, Ct;
 
-    real forceRatio = this->density*pow(this->delta_x,4)*pow(this->delta_t,-2);
-
     for( uint blade=0; blade<this->nBlades; blade++)
-    {
-        localAzimuth = this->azimuth+2*blade*vf::lbm::constant::cPi/this->nBlades;
+    { 
         for( uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++)
-        {
-            node = bladeNode+blade*this->nBladeNodes;
-            uXYZ_X = this->bladeVelocitiesXH[node]*para->getVelocityRatio();
-            uXYZ_Y = this->bladeVelocitiesYH[node]*para->getVelocityRatio();
-            uXYZ_Z = this->bladeVelocitiesZH[node]*para->getVelocityRatio();
-
-            invRotateAboutX3D(localAzimuth, uXYZ_X, uXYZ_Y, uXYZ_Z, uRTZ_X, uRTZ_Y, uRTZ_Z);
-            r = this->bladeRadiiH[bladeNode];
+        {        
+            node = calcNode(bladeNode, this->nBladeNodes, blade, this->nBlades);
 
-            u_rel = uRTZ_X;
-            v_rel = uRTZ_Y+this->omega*r;
+            u_rel = this->bladeVelocitiesXH[node];
+            v_rel = this->bladeVelocitiesYH[node];
             u_rel_sq = u_rel*u_rel+v_rel*v_rel;
             phi = atan2(u_rel, v_rel);
 
-            c = c0 * sqrt( 1.f- pow(4.f*r/this->diameter-1.f, 2.f) );
+            c = c0 * sqrt( 1.f- pow(4.f*this->bladeRadiiH[bladeNode]/this->diameter-1.f, 2.f) );
             Cn =   Cl*cos(phi)+Cd*sin(phi);
             Ct =  -Cl*sin(phi)+Cd*cos(phi);
-
-            fRTZ_X = 0.5f*u_rel_sq*c*this->density*Cn;
-            fRTZ_Y = 0.5f*u_rel_sq*c*this->density*Ct;
-            fRTZ_Z = 0.0;
-
-            rotateAboutX3D(localAzimuth, fRTZ_X, fRTZ_Y, fRTZ_Z, fXYZ_X, fXYZ_Y, fXYZ_Z);
         
-            this->bladeForcesXH[node] = fXYZ_X/forceRatio;
-            this->bladeForcesYH[node] = fXYZ_Y/forceRatio;
-            this->bladeForcesZH[node] = fXYZ_Z/forceRatio;
+            this->bladeForcesXH[node] = -0.5f*u_rel_sq*c*this->density*Cn;
+            this->bladeForcesYH[node] = -0.5f*u_rel_sq*c*this->density*Ct;
+            this->bladeForcesZH[node] = 0.0f;
         }
     }
 }
 
-void ActuatorLine::rotateBlades(real angle)
+void ActuatorLine::calcBladeForces()
 {
-    for(uint node=0; node<this->nBladeNodes*this->nBlades; node++)
-    {
-        real oldCoordX = this->bladeCoordsXH[node];
-        real oldCoordY = this->bladeCoordsYH[node];
-        real oldCoordZ = this->bladeCoordsZH[node];
-
-        real newCoordX, newCoordY, newCoordZ;
-        rotateAboutX3D(angle, oldCoordX, oldCoordY, oldCoordZ, newCoordX, newCoordY, newCoordZ, this->turbinePosX, this->turbinePosY, this->turbinePosZ);
-        
-        this->bladeCoordsYH[node] = newCoordX;
-        this->bladeCoordsYH[node] = newCoordY;
-        this->bladeCoordsZH[node] = newCoordZ;
-    }
+    this->calcForcesEllipticWing();
 }
 
 void ActuatorLine::initBladeRadii(CudaMemoryManager* cudaManager)
 {   
     cudaManager->cudaAllocBladeRadii(this);
 
-    real dx = 0.5f*this->diameter/this->nBladeNodes;        
+    real dx = 0.5f*this->diameter/this->nBladeNodes;  
+
     for(uint node=0; node<this->nBladeNodes; node++)
     {
         this->bladeRadiiH[node] = dx*(node+1);
@@ -297,20 +335,15 @@ void ActuatorLine::initBladeCoords(CudaMemoryManager* cudaManager)
 {   
     cudaManager->cudaAllocBladeCoords(this);
 
-    for( uint blade=0; blade<this->nBlades; blade++)
+    for(uint blade=0; blade<this->nBlades; blade++)
     {
-        real localAzimuth = this->azimuth+(2*vf::lbm::constant::cPi/this->nBlades)*blade;
-        for(uint node=0; node<this->nBladeNodes; node++)
+        for(uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++)
         {
-            real coordX, coordY, coordZ;
-            real x,y,z;
-            x = 0.f;
-            y = 0.f;
-            z = this->bladeRadiiH[node];
-            rotateAboutX3D(localAzimuth, x, y, z, coordX, coordY, coordZ);
-            this->bladeCoordsXH[node+this->nBladeNodes*blade] = coordX+this->turbinePosX;
-            this->bladeCoordsYH[node+this->nBladeNodes*blade] = coordY+this->turbinePosY;
-            this->bladeCoordsZH[node+this->nBladeNodes*blade] = coordZ+this->turbinePosZ;
+            uint node = calcNode(bladeNode, this->nBladeNodes, blade, this->nBlades);
+
+            this->bladeCoordsXH[node] = 0.f;
+            this->bladeCoordsYH[node] = 0.f;
+            this->bladeCoordsZH[node] = this->bladeRadiiH[bladeNode];
         }
     }
     cudaManager->cudaCopyBladeCoordsHtoD(this);
@@ -346,17 +379,9 @@ void ActuatorLine::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMana
 {   
     cudaManager->cudaAllocBladeIndices(this);
 
-    real* coordsX = para->getParH(this->level)->coordX_SP;
-    real* coordsY = para->getParH(this->level)->coordY_SP;
-    real* coordsZ = para->getParH(this->level)->coordZ_SP;
-
     for(uint node=0; node<this->numberOfNodes; node++)
     {
-        this->bladeIndicesH[node] = findNearestCellBSW(1, coordsX, coordsY, coordsZ, 
-                                                       this->bladeCoordsXH[node], this->bladeCoordsYH[node], this->bladeCoordsZH[node],
-                                                       para->getParH(this->level)->neighborX_SP, para->getParH(this->level)->neighborY_SP, para->getParH(this->level)->neighborZ_SP,
-                                                       para->getParH(this->level)->neighborWSB_SP);
-        
+        this->bladeIndicesH[node] = 1;
     }
     cudaManager->cudaCopyBladeIndicesHtoD(this);
 }
@@ -365,19 +390,50 @@ void ActuatorLine::initBoundingSphere(Parameter* para, CudaMemoryManager* cudaMa
 {
     // Actuator line exists only on 1 level
     std::vector<int> nodesInSphere;
+    real sphereRadius = 0.5*this->diameter+4.f*this->epsilon;
 
     for (uint j = 1; j <= para->getParH(this->level)->size_Mat_SP; j++)
     {
-        const real coordX = para->getParH(this->level)->coordX_SP[j];
-        const real coordY = para->getParH(this->level)->coordY_SP[j];
-        const real coordZ = para->getParH(this->level)->coordZ_SP[j];
-        const real dist = sqrt(pow(coordX-this->turbinePosX,2)+pow(coordY-this->turbinePosY,2)+pow(coordZ-this->turbinePosZ,2));
-        
-        if(dist < 0.6*this->diameter) nodesInSphere.push_back(j);
+        const real distX = para->getParH(this->level)->coordX_SP[j]-this->turbinePosX;
+        const real distY = para->getParH(this->level)->coordY_SP[j]-this->turbinePosY;
+        const real distZ = para->getParH(this->level)->coordZ_SP[j]-this->turbinePosZ;
+        const real dist = sqrt(pow(distX,2)+pow(distY,2)+pow(distZ,2));
+        if(dist < sphereRadius) nodesInSphere.push_back(j);
     }
 
     this->numberOfIndices = uint(nodesInSphere.size());
     cudaManager->cudaAllocSphereIndices(this);
     std::copy(nodesInSphere.begin(), nodesInSphere.end(), this->boundingSphereIndicesH);
     cudaManager->cudaCopySphereIndicesHtoD(this);
+}
+
+void ActuatorLine::setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ)
+{ 
+
+    for(uint node=0; node<this->numberOfNodes; node++)
+    {
+        this->bladeCoordsXH[node] = _bladeCoordsX[node];
+        this->bladeCoordsYH[node] = _bladeCoordsY[node];
+        this->bladeCoordsZH[node] = _bladeCoordsZ[node];
+    }
+}
+
+void ActuatorLine::setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ)
+{ 
+    for(uint node=0; node<this->numberOfNodes; node++)
+    {
+        this->bladeVelocitiesXH[node] = _bladeVelocitiesX[node];
+        this->bladeVelocitiesYH[node] = _bladeVelocitiesY[node];
+        this->bladeVelocitiesZH[node] = _bladeVelocitiesZ[node];
+    }
+}
+
+void ActuatorLine::setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ)
+{ 
+    for(uint node=0; node<this->numberOfNodes; node++)
+    {
+        this->bladeForcesXH[node] = _bladeForcesX[node];
+        this->bladeForcesYH[node] = _bladeForcesY[node];
+        this->bladeForcesZH[node] = _bladeForcesZ[node];
+    }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
index f0de6c8e01b2f4cfc0421f02b1a75308512e34e8..6a61cb7bdb1f948cf0a9404129af2f3afb3842e3 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
@@ -3,11 +3,12 @@
 
 #include "PreCollisionInteractor.h"
 #include "PointerDefinitions.h"
+#include "VirtualFluids_GPU_export.h"
 
 class Parameter;
 class GridProvider;
 
-class ActuatorLine : public PreCollisionInteractor
+class VIRTUALFLUIDS_GPU_EXPORT ActuatorLine : public PreCollisionInteractor
 {
 public:
     ActuatorLine(
@@ -35,23 +36,27 @@ public:
         this->numberOfNodes = this->nBladeNodes*this->nBlades;
         this->omega = 1.0f;
         this->azimuth = 0.0f;
+        this->yaw = 0.0f;
+    };
 
-    }
-
-    virtual  ~ActuatorLine()
-    {
-        
-    }
+    virtual ~ActuatorLine(){};
 
-    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager);
-    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t);
-    void free(Parameter* para, CudaMemoryManager* cudaManager);
+    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
+    void free(Parameter* para, CudaMemoryManager* cudaManager) override;
     void write(uint t);
 
     uint getNBladeNodes(){return this->nBladeNodes;};
     uint getNBlades(){return this->nBlades;};
     uint getNumberOfIndices(){return this->numberOfIndices;};
     uint getNumberOfNodes(){return this->numberOfNodes;};
+    real getOmega(){ return this->omega; };
+    real getAzimuth(){ return this->azimuth; };
+    real getDensity(){ return this->density; };
+    real getPositionX(){ return this->turbinePosX; };
+    real getPositionY(){ return this->turbinePosY; };
+    real getPositionZ(){ return this->turbinePosZ; };
+    real* getBladeRadii(){return this->bladeRadiiH;};
     real* getBladeCoordsX(){return this->bladeCoordsXH;};
     real* getBladeCoordsY(){return this->bladeCoordsYH;};
     real* getBladeCoordsZ(){return this->bladeCoordsZH;};
@@ -62,6 +67,14 @@ public:
     real* getBladeForcesY(){return this->bladeForcesYH;};
     real* getBladeForcesZ(){return this->bladeForcesZH;};
 
+    void setOmega(real _omega){ this->omega = _omega; };
+    void setAzimuth(real _azimuth){ this->azimuth = _azimuth; };
+    void setYaw(real _yaw){ this->yaw = _yaw; };
+    void setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ);
+    void setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ);
+    void setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ);
+    virtual void calcBladeForces();
+
 private:
     void initBoundingSphere(Parameter* para, CudaMemoryManager* cudaManager);
 
@@ -71,12 +84,12 @@ private:
     void initBladeForces(CudaMemoryManager* cudaManager);
     void initBladeIndices(Parameter* para, CudaMemoryManager* cudaManager);
 
-    void calcForcesEllipticWing(Parameter* para);
+    void calcForcesEllipticWing();
     void rotateBlades(real angle);
 
-    void writeBladeCoords(uint t);
-    void writeBladeForces(uint t);
-    void writeBladeVelocities(uint t);
+    void writeBladeCoords(uint t){};
+    void writeBladeForces(uint t){};
+    void writeBladeVelocities(uint t){};
     
 
 public:
@@ -96,7 +109,7 @@ public:
 private:
     const real density;
     real turbinePosX, turbinePosY, turbinePosZ;
-    real omega, azimuth, delta_t, delta_x;
+    real omega, azimuth, yaw, delta_t, delta_x;
     const real diameter;
     const uint nBladeNodes;
     const uint nBlades;
@@ -104,6 +117,6 @@ private:
     const int level;
     uint numberOfIndices;
     uint numberOfNodes;
-    };
+};
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
index 6f6af68fc2e4b5d93502d9890a29cc4838ac6042..e1ee591b4bfe0bc03771e023987241e7458265b7 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
@@ -8,10 +8,12 @@ class PlaneProbe : public Probe
 public: 
     PlaneProbe(
         const std::string _probeName,
+        const std::string _outputPath,
         uint _tStartAvg,
         uint _tStartOut,
         uint _tOut
-    ): Probe(_probeName, 
+    ): Probe(_probeName,
+             _outputPath, 
              _tStartAvg, 
              _tStartOut, 
              _tOut)
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
index 917d2cf8c4f4606fc297491782bbc92dc621cf0e..1be97f7f4c3ce25b01cb03ebaae7c2795bc60cc6 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
@@ -8,10 +8,12 @@ class PointProbe: public Probe
 public:
     PointProbe(
         const std::string _probeName,
+        const std::string _outputPath,
         uint _tStartAvg,
         uint _tStartOut,
         uint _tOut
-    ): Probe(_probeName, 
+    ): Probe(_probeName,
+             _outputPath, 
              _tStartAvg, 
              _tStartOut, 
              _tOut)
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
index e798f433270777f8d718218e91306f980b20e9ca..1875ef83b9bd388f16cf7d63fe4c3af2968a9113 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -18,6 +18,12 @@ std::vector<std::string> getPostProcessingVariableNames(PostProcessingVariable v
     std::vector<std::string> varNames;
     switch (variable)
     {
+    case PostProcessingVariable::Instantaneous:
+        varNames.push_back("vx");
+        varNames.push_back("vy");
+        varNames.push_back("vz");
+        varNames.push_back("rho");
+        break;
     case PostProcessingVariable::Means:
         varNames.push_back("vx_mean");
         varNames.push_back("vy_mean");
@@ -42,6 +48,15 @@ __device__ void calculateQuantities(uint n, real* quantityArray, bool* quantitie
     // also has extensions for higher order and covariances
     real inv_n = 1/real(n);
 
+    if(quantities[int(PostProcessingVariable::Instantaneous)])
+    {
+        uint arrOff = quantityArrayOffsets[int(PostProcessingVariable::Instantaneous)];
+        quantityArray[(arrOff+0)*nPoints+node] = vx;
+        quantityArray[(arrOff+1)*nPoints+node] = vy;
+        quantityArray[(arrOff+2)*nPoints+node] = vz;
+        quantityArray[(arrOff+3)*nPoints+node] = rho;
+    }
+
     if(quantities[int(PostProcessingVariable::Means)])
     {
         
@@ -136,7 +151,6 @@ __global__ void interpQuantities(   uint* pointIndices,
 
 }
 
-
 void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager)
 {
 
@@ -158,13 +172,11 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager*
         
         this->addProbeStruct(cudaManager, probeIndices_level, 
                             distX_level, distY_level, distZ_level, 
-                            pointCoordsX_level, pointCoordsX_level, pointCoordsX_level, 
+                            pointCoordsX_level, pointCoordsY_level, pointCoordsZ_level, 
                             level);
     }
 }
 
-
-
 void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& probeIndices,
                                       std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ,   
                                       std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ,
@@ -223,7 +235,6 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& pro
     cudaManager->cudaCopyProbeQuantityArrayHtoD(this, level);
 }
 
-
 void Probe::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t)
 {
 
@@ -255,8 +266,6 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaManager)
     }
 }
 
-
-
 void Probe::addPostProcessingVariable(PostProcessingVariable variable)
 {
     this->quantities[int(variable)] = true;
@@ -296,7 +305,7 @@ void Probe::writeCollectionFile(Parameter* para, int t)
 
     std::ofstream file;
 
-    file.open( filename + ".pvtu" );
+    file.open(this->outputPath + "/" + filename + ".pvtu" );
 
     //////////////////////////////////////////////////////////////////////////
     
@@ -370,6 +379,9 @@ void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>&
 
             switch(quantity)
             {
+            case PostProcessingVariable::Instantaneous:
+                coeff = para->getVelocityRatio();
+            break;
             case PostProcessingVariable::Means:
                 coeff = para->getVelocityRatio();
             break;
@@ -390,7 +402,7 @@ void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>&
                 }
             }
         }}
-        WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(fnames[part], nodes, nodedatanames, nodedata);
+        WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(this->outputPath + "/" + fnames[part], nodes, nodedatanames, nodedata);
     }
 }
 
@@ -405,4 +417,3 @@ std::vector<std::string> Probe::getVarNames()
     }}
     return varNames;
 }
-
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
index 8223411ffc45fdac8b3ba038f97eeb899dd3d94d..988d1817e3b19bbfa5c25ed8d271a105ff433de9 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
@@ -12,6 +12,7 @@ enum class PostProcessingVariable{
     // In writeGridFiles add lb->rw conversion factor
     // In getPostProcessingVariableNames add names
     // If new quantity depends on other quantities i.e. mean, catch in addPostProcessingVariable
+    Instantaneous,
     Means,
     Variances,
     LAST,
@@ -43,10 +44,12 @@ class Probe : public PreCollisionInteractor
 public:
     Probe(
         const std::string _probeName,
+        const std::string _outputPath,
         uint _tStartAvg,
         uint _tStartOut,
         uint _tOut
     ):  probeName(_probeName),
+        outputPath(_outputPath),
         tStartAvg(_tStartAvg),
         tStartOut(_tStartOut),
         tOut(_tOut),
@@ -54,9 +57,10 @@ public:
     {
         assert("Output starts before averaging!" && tStartOut>=tStartAvg);
     }
-    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager);
-    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t);
-    void free(Parameter* para, CudaMemoryManager* cudaManager);
+    
+    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
+    void free(Parameter* para, CudaMemoryManager* cudaManager) override;
 
     SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; }
 
@@ -80,9 +84,10 @@ private:
     
 private:
     const std::string probeName;
+    const std::string outputPath;
 
     std::vector<SPtr<ProbeStruct>> probeParams;
-    bool quantities[int(PostProcessingVariable::LAST)];
+    bool quantities[int(PostProcessingVariable::LAST)] = {};
     std::vector<std::string> fileNamesForCollectionFile;
     std::vector<std::string> varNames;
 
diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt
index 3a88d91648b960915279fc4f133f6b60047149bf..4142b7c3b1c46275c3257e3dfd657cc6b30c841d 100644
--- a/src/lbm/cuda/CMakeLists.txt
+++ b/src/lbm/cuda/CMakeLists.txt
@@ -4,7 +4,7 @@ project(lbmCuda LANGUAGES CUDA CXX)
 vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm)
 
 
-set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
 
 
 set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA)