diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py
index cd0e06149a52430cbdc811106b505bc6049e2221..6e3c8608617df1267535984d53307dea9184c6ab 100644
--- a/Python/actuator_line/actuator_line.py
+++ b/Python/actuator_line/actuator_line.py
@@ -16,8 +16,8 @@ sim_name = "ActuatorLine"
 config_file = Path(__file__).parent/Path("config.txt")
 output_path = Path(__file__).parent/Path("output")
 output_path.mkdir(exist_ok=True)
-timeStepOut = 500
-t_end = 50
+t_out = 100.
+t_end = 500.
 
 #%%
 logger.Logger.initialize_logger()
@@ -25,15 +25,18 @@ basics.logger.Logger.add_stdout()
 basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
 basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
 basics.logger.Logger.enable_printed_rank_numbers(True)
+# %%
+comm = gpu.Communicator.get_instance()
+#%%
+grid_factory = gpu.grid_generator.GridFactory.make()
+grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
+
 #%%
-grid_builder = gpu.MultipleGridBuilder.make_shared()
 dx = reference_diameter/nodes_per_diameter
 
 grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
 grid_builder.set_periodic_boundary_condition(False, False, False)
 grid_builder.build_grids(basics.LbmOrGks.LBM, False)
-# %%
-comm = gpu.Communicator.get_instance()
 #%%
 config = basics.ConfigurationFile()
 config.load(str(config_file))
@@ -55,6 +58,7 @@ para.set_max_level(1)
 para.set_velocity(velocity_lb)
 para.set_viscosity(viscosity_lb)    
 para.set_velocity_ratio(dx/dt)
+para.set_viscosity_ratio(dx*dx/dt)
 para.set_main_kernel("TurbulentViscosityCumulantK17CompChim")
 para.set_use_AMD(True)
 para.set_SGS_constant(0.083)
@@ -63,7 +67,7 @@ def init_func(coord_x, coord_y, coord_z):
     return [0.0, velocity_lb, 0.0, 0.0]
 
 para.set_initial_condition(init_func)
-para.set_t_out(timeStepOut)
+para.set_t_out(int(t_out/dt))
 para.set_t_end(int(t_end/dt))
 para.set_is_body_force(True)
 
@@ -79,8 +83,8 @@ grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0,
 grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0.0)
 
 #%%
-cuda_memory_manager = gpu.CudaMemoryManager.make(para)
-grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager)
+cuda_memory_manager = gpu.CudaMemoryManager(para)
+grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm)
 #%%
 turb_pos = np.array([3,3,3])*reference_diameter
 epsilon = 5
@@ -91,21 +95,17 @@ n_blade_nodes = 32
 alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx)
 para.add_actuator(alm)
 #%%
-point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 500, 100)
+point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 1, 500, 100)
 point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter)
-point_probe.add_post_processing_variable(gpu.probes.PostProcessingVariable.Means)
+point_probe.add_statistic(gpu.probes.Statistic.Means)
 
 para.add_probe(point_probe)
 
-plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 500, 100)
+plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 1, 500, 100)
 plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2])
 para.add_probe(plane_probe)
 #%%
-sim = gpu.Simulation(comm)
-kernel_factory = gpu.KernelFactory.get_instance()
-sim.set_factories(kernel_factory, gpu.PreProcessorFactory.get_instance())
-sim.init(para, grid_generator, gpu.FileWriter(), cuda_memory_manager)
+sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator)
 #%%
 sim.run()
-sim.free()
 MPI.Finalize()
\ No newline at end of file
diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py
index cf941a9418e5c3ec5d94864f119de20401601622..1c01f50946b49bc0ddab7e50065a24aab4ae869f 100644
--- a/Python/boundary_layer/boundary_layer.py
+++ b/Python/boundary_layer/boundary_layer.py
@@ -4,20 +4,34 @@ from pathlib import Path
 from mpi4py import MPI
 from pyfluids import basics, gpu, logger
 #%%
-reference_diameter = 126
+reference_height = 1000 # boundary layer height in m
 
-length = np.array([30,8,8])*reference_diameter
+length = np.array([6,4,1])*reference_height
 viscosity = 1.56e-5
-velocity = 9
 mach = 0.1
-nodes_per_diameter = 32
+nodes_per_height = 32
+
+z_0 = 0.1
+u_star = 0.4
+kappa = 0.4
+
+velocity = 0.5*u_star/kappa*np.log(length[2]/z_0+1)
+flow_through_time = length[0]/velocity
+use_AMD = True
+
 
 sim_name = "BoundaryLayer"
 config_file = Path(__file__).parent/Path("config.txt")
 output_path = Path(__file__).parent/Path("output")
 output_path.mkdir(exist_ok=True)
-timeStepOut = 500
-t_end = 50
+t_out = 1000.
+t_end = 5000.
+
+t_start_averaging = 0
+t_start_tmp_averaging =  100_000
+t_averaging = 200
+t_start_out_probe = 0
+t_out_probe = 1000
 
 #%%
 logger.Logger.initialize_logger()
@@ -25,24 +39,37 @@ basics.logger.Logger.add_stdout()
 basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
 basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
 basics.logger.Logger.enable_printed_rank_numbers(True)
-#%%
-grid_builder = gpu.MultipleGridBuilder.make_shared()
-dx = reference_diameter/nodes_per_diameter
-
-grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
-grid_builder.set_periodic_boundary_condition(False, False, False)
-grid_builder.build_grids(basics.LbmOrGks.LBM, False)
 # %%
 comm = gpu.Communicator.get_instance()
+#%%
+grid_factory = gpu.grid_generator.GridFactory.make()
+grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
+
+#%%
+dx = reference_height/nodes_per_height
+dt = dx * mach / (np.sqrt(3) * velocity)
+velocity_lb = velocity * dt / dx # LB units
+viscosity_lb = viscosity * dt / (dx * dx) # LB units
+
+pressure_gradient = u_star**2 / reference_height
+pressure_gradient_lb = pressure_gradient * dt**2 / dx
+
+logger.vf_log_info(f"velocity    = {velocity_lb:1.6} dx/dt")
+logger.vf_log_info(f"dt          = {dt:1.6}")
+logger.vf_log_info(f"dx          = {dx:1.6}")
+logger.vf_log_info(f"u*          = {u_star:1.6}")
+logger.vf_log_info(f"dpdx        = {pressure_gradient:1.6}")
+logger.vf_log_info(f"dpdx        = {pressure_gradient_lb:1.6} dx/dt^2")
+logger.vf_log_info(f"viscosity   = {viscosity_lb:1.6} dx^2/dt")
+
+
 #%%
 config = basics.ConfigurationFile()
 config.load(str(config_file))
 #%%
 para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid())
 
-dt = dx * mach / (np.sqrt(3) * velocity)
-velocity_lb = velocity * dt / dx # LB units
-viscosity_lb = viscosity * dt / (dx * dx) # LB units
+
 
 #%%
 para.set_devices([0])
@@ -55,54 +82,52 @@ para.set_max_level(1)
 para.set_velocity(velocity_lb)
 para.set_viscosity(viscosity_lb)    
 para.set_velocity_ratio(dx/dt)
-para.set_main_kernel("CumulantK17CompChim")
+para.set_viscosity_ratio(dx*dx/dt)
+para.set_use_AMD(use_AMD)
+
+para.set_main_kernel("TurbulentViscosityCumulantK17CompChim" if para.get_use_AMD() else "CummulantK17CompChim")
+
+para.set_SGS_constant(0.083)
 
 def init_func(coord_x, coord_y, coord_z):
-    return [0.0, velocity_lb, 0.0, 0.0]
+    return [
+        0.0, 
+        (u_star/kappa*np.log(max(coord_z/z_0,0)+1) + 2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2]))/((coord_z/reference_height)**2+0.1)*dt/dx, 
+        2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2])/((coord_z/reference_height)**2+0.1)*dt/dx, 
+        8*u_star/kappa*(np.sin(np.pi*8*coord_y/reference_height)*np.sin(np.pi*8*coord_z/reference_height)+np.sin(np.pi*8*coord_x/length[0]))/((length[2]/2-coord_z)**2+0.1)*dt/dx
+        ]
 
 para.set_initial_condition(init_func)
-para.set_t_out(timeStepOut)
+para.set_t_out(int(t_out/dt))
 para.set_t_end(int(t_end/dt))
 para.set_is_body_force(True)
+para.set_has_wall_model_monitor(True)
 
-#%%
-grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_lb, 0.0, 0.0)
-grid_builder.set_velocity_boundary_condition(gpu.SideType.PX, velocity_lb, 0.0, 0.0)
-
-grid_builder.set_velocity_boundary_condition(gpu.SideType.MY, velocity_lb, 0.0, 0.0)
-grid_builder.set_velocity_boundary_condition(gpu.SideType.PY, velocity_lb, 0.0, 0.0)
 
-grid_builder.set_velocity_boundary_condition(gpu.SideType.MZ, velocity_lb, 0.0, 0.0)
-grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0, 0.0)
-
-#%%
-cuda_memory_manager = gpu.CudaMemoryManager.make(para)
-grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager)
+grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
+grid_builder.set_periodic_boundary_condition(True, True, False)
+grid_builder.build_grids(basics.LbmOrGks.LBM, False)
 #%%
-turb_pos = np.array([3,3,3])*reference_diameter
-epsilon = 5
-density = 1.225
-level = 0
-n_blades = 3
-n_blade_nodes = 32
-alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx)
-para.add_actuator(alm)
+sampling_offset = 2
+grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0.0, 0.0, 1.0, sampling_offset, z_0/dx)
+grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0.0, 0.0, 0.0)
+
 #%%
-point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 500, 100)
-point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter)
-point_probe.add_post_processing_variable(gpu.probes.PostProcessingVariable.Means)
+cuda_memory_manager = gpu.CudaMemoryManager(para)
+grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm)
 
-para.add_probe(point_probe)
+#%%
+wall_probe = gpu.probes.WallModelProbe("wallModelProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt))
+wall_probe.add_all_available_statistics()
+wall_probe.set_file_name_to_n_out()
+wall_probe.set_force_output_to_stress(True)
+if para.get_is_body_force():
+    wall_probe.set_evaluate_pressure_gradient(True)
+planar_probe = gpu.probes.PlanarAverageProbe("planarAverageProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt), int(t_start_out_probe/dt), int(t_out_probe/dt), "z")
+para.add_probe(wall_probe)
 
-plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 500, 100)
-plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2])
-para.add_probe(plane_probe)
 #%%
-sim = gpu.Simulation(comm)
-kernel_factory = gpu.KernelFactory.get_instance()
-sim.set_factories(kernel_factory, gpu.PreProcessorFactory.get_instance())
-sim.init(para, grid_generator, gpu.FileWriter(), cuda_memory_manager)
+sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator)
 #%%
 sim.run()
-sim.free()
 MPI.Finalize()
\ No newline at end of file
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 2f3c7337154703ae4e2ee108fe5a3ce0620191ff..51ebaf7e465d75f793ba04d3ecd6686aa94b8b8d 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -194,13 +194,14 @@ void multipleLevel(const std::string& configPath)
     std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
     pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
     // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
-    pointProbe->addPostProcessingVariable(PostProcessingVariable::Means);
-    pointProbe->addPostProcessingVariable(PostProcessingVariable::Variances);
+    
+    pointProbe->addStatistic(Statistic::Means);
+    pointProbe->addStatistic(Statistic::Variances);
     para->addProbe( pointProbe );
 
-    SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", para->getOutputPath(), 100, 500, 100) );
+    SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", para->getOutputPath(), 100, 500, 100, 100) );
     planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
-    planeProbe->addPostProcessingVariable(PostProcessingVariable::Means);
+    planeProbe->addStatistic(Statistic::Means);
     para->addProbe( planeProbe );
 
 
diff --git a/pythonbindings/src/gpu/gpu.cpp b/pythonbindings/src/gpu/gpu.cpp
index 1dd960dbb3ff9c9ef21cecb36e5df90e74360726..dc110cd5e19a9aad4937f9c2133ddf74c0ddf9bf 100644
--- a/pythonbindings/src/gpu/gpu.cpp
+++ b/pythonbindings/src/gpu/gpu.cpp
@@ -5,14 +5,10 @@
 #include "submodules/parameter.cpp"
 #include "submodules/boundary_conditions.cpp"
 #include "submodules/communicator.cpp"
-#include "submodules/grid_builder.cpp"
 #include "submodules/cuda_memory_manager.cpp"
 #include "submodules/grid_provider.cpp"
-#include "submodules/probes.cpp"
-#include "submodules/kernel_factory.cpp"
-#include "submodules/pre_processor_factory.cpp"
-#include "submodules/file_writer.cpp"
 #include "submodules/grid_generator.cpp"
+#include "submodules/probes.cpp"
 
 namespace gpu
 {
@@ -27,13 +23,9 @@ namespace gpu
         actuator_line::makeModule(gpuModule);
         boundary_conditions::makeModule(gpuModule);
         communicator::makeModule(gpuModule); 
-        grid_builder::makeModule(gpuModule);
         cuda_memory_manager::makeModule(gpuModule);
         grid_provider::makeModule(gpuModule);
         probes::makeModule(gpuModule);
-        kernel_factory::makeModule(gpuModule);
-        pre_processor_factory::makeModule(gpuModule);
-        file_writer::makeModule(gpuModule);
         grid_generator::makeModule(gpuModule);
         return gpuModule;
     }
diff --git a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
index f337aeb306fd1836e5bc2962e85f45d593185836..bf27080cb3cd050343ba42b0571827ed58870cfd 100644
--- a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
+++ b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
@@ -10,6 +10,6 @@ namespace cuda_memory_manager
     void makeModule(py::module_ &parentModule)
     {
         py::class_<CudaMemoryManager, std::shared_ptr<CudaMemoryManager>>(parentModule, "CudaMemoryManager")
-        .def("make", &CudaMemoryManager::make, py::return_value_policy::reference);
+        .def(py::init<std::shared_ptr<Parameter>>(), "parameter");
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/file_writer.cpp b/pythonbindings/src/gpu/submodules/file_writer.cpp
deleted file mode 100644
index 2ad90fe7381be215b2d257b5be99caf25db1e0ae..0000000000000000000000000000000000000000
--- a/pythonbindings/src/gpu/submodules/file_writer.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <gpu/VirtualFluids_GPU/Output/FileWriter.h>
-#include <gpu/VirtualFluids_GPU/Output/DataWriter.h>
-
-
-namespace file_writer
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::class_<DataWriter, std::shared_ptr<DataWriter>>(parentModule, "_DataWriter");
-
-        py::class_<FileWriter, DataWriter, std::shared_ptr<FileWriter>>(parentModule, "FileWriter")
-        .def(py::init<>());
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_builder.cpp b/pythonbindings/src/gpu/submodules/grid_builder.cpp
deleted file mode 100644
index 09241f71cbe9cd013600c34e4d0a07e53a1bc79f..0000000000000000000000000000000000000000
--- a/pythonbindings/src/gpu/submodules/grid_builder.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <pybind11/pybind11.h>
-#include "gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
-#include "gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "gpu/GridGenerator/geometries/Object.h"
-#include "gpu/GridGenerator/grid/BoundaryConditions/Side.h"
-
-namespace grid_builder
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {        
-        py::class_<GridBuilder, std::shared_ptr<GridBuilder>>(parentModule, "GridBuilder")
-        .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels)
-        .def("get_grid", &GridBuilder::getGrid);
-
-        py::class_<LevelGridBuilder, GridBuilder, std::shared_ptr<LevelGridBuilder>>(parentModule, "LevelGridBuilder")
-        .def("get_grid", py::overload_cast<int, int>(&LevelGridBuilder::getGrid))
-        .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition)
-        .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition)
-        .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition)
-        .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition)
-        .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition);
-
-        py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(parentModule, "MultipleGridBuilder")
-        .def("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference)
-        .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid)
-        .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid))
-        .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid))
-        .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry))
-        .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry))
-        .def("get_number_of_levels", &MultipleGridBuilder::getNumberOfLevels)
-        .def("build_grids", &MultipleGridBuilder::buildGrids);
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp
index b9babe1d6b45f69d516a4cd57377eaa6c1b37149..579c06c4e00cae9646ced8b554d71631eeb7e793 100644
--- a/pythonbindings/src/gpu/submodules/grid_generator.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp
@@ -5,6 +5,10 @@
 #include "gpu/GridGenerator/geometries/Cuboid/Cuboid.h"
 #include "gpu/GridGenerator/geometries/Sphere/Sphere.h"
 #include "gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
+#include "gpu/GridGenerator/grid/GridFactory.h"
+#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
+#include "gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
 
 namespace grid_generator
 {
@@ -13,6 +17,9 @@ namespace grid_generator
     {  
         py::module gridGeneratorModule = parentModule.def_submodule("grid_generator");
 
+        py::class_<GridFactory, std::shared_ptr<GridFactory>>(gridGeneratorModule, "GridFactory")
+        .def("make", &GridFactory::make, py::return_value_policy::reference);
+
         py::class_<BoundingBox>(gridGeneratorModule, "BoundingBox")
         .def(py::init<real, real, real, real, real, real>(),"min_x","max_x","min_y","max_y","min_z","max_z");
 
@@ -33,6 +40,29 @@ namespace grid_generator
         py::class_<TriangularMesh, Object, std::shared_ptr<TriangularMesh>>(gridGeneratorModule, "TriangularMesh")
         .def("make", &TriangularMesh::make, py::return_value_policy::reference);
 
+        py::class_<GridBuilder, std::shared_ptr<GridBuilder>>(gridGeneratorModule, "GridBuilder")
+        .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels)
+        .def("get_grid", &GridBuilder::getGrid);
+
+        py::class_<LevelGridBuilder, GridBuilder, std::shared_ptr<LevelGridBuilder>>(gridGeneratorModule, "LevelGridBuilder")
+        .def("get_grid", py::overload_cast<int, int>(&LevelGridBuilder::getGrid))
+        .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition)
+        .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition)
+        .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition)
+        .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition)
+        .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition)
+        .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition);
+
+        py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(gridGeneratorModule, "MultipleGridBuilder")
+        .def("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference)
+        .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid)
+        .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid))
+        .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid))
+        .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry))
+        .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry))
+        .def("get_number_of_levels", &MultipleGridBuilder::getNumberOfLevels)
+        .def("build_grids", &MultipleGridBuilder::buildGrids);
+
         return gridGeneratorModule;
     }
 }
diff --git a/pythonbindings/src/gpu/submodules/grid_provider.cpp b/pythonbindings/src/gpu/submodules/grid_provider.cpp
index 5a5514dd8b14d24f4818740e115c8504bc973726..02ff273e2cd1a2022943e19c9a48a447d9dfe54b 100644
--- a/pythonbindings/src/gpu/submodules/grid_provider.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_provider.cpp
@@ -1,8 +1,8 @@
 #include <pybind11/pybind11.h>
 #include "gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-#include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
-#include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
-#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
+// #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
+// #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
+// #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
 
 namespace grid_provider
 {
diff --git a/pythonbindings/src/gpu/submodules/kernel_factory.cpp b/pythonbindings/src/gpu/submodules/kernel_factory.cpp
deleted file mode 100644
index af710948aa75900ba9f4051360ec26dd8510118a..0000000000000000000000000000000000000000
--- a/pythonbindings/src/gpu/submodules/kernel_factory.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h>
-#include <gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactory.h>
-
-namespace kernel_factory
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::class_<KernelFactory, std::shared_ptr<KernelFactory>>(parentModule, "_KernelFactory");
-        
-        py::class_<KernelFactoryImp, KernelFactory, std::shared_ptr<KernelFactoryImp>>(parentModule, "KernelFactory")
-        .def("get_instance", &KernelFactoryImp::getInstance, py::return_value_policy::reference);
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp
index 626693fd0ee32ce6866f67f54a71fe414f0f9c8e..0ea87924bd0a2fc2b3a6bc343cad5948febe2ffe 100644
--- a/pythonbindings/src/gpu/submodules/parameter.cpp
+++ b/pythonbindings/src/gpu/submodules/parameter.cpp
@@ -67,6 +67,11 @@ namespace parameter
         .def("get_viscosity_ratio", &Parameter::getViscosityRatio)
         .def("get_density_ratio", &Parameter::getDensityRatio)
         .def("get_force_ratio", &Parameter::getForceRatio)
+        .def("get_use_AMD", &Parameter::getUseAMD)
+        .def("get_use_Wale", &Parameter::getUseWale)
+        .def("get_SGS_constant", &Parameter::getSGSConstant)
+        .def("get_is_body_force", &Parameter::getIsBodyForce)
+        .def("set_has_wall_model_monitor", &Parameter::setHasWallModelMonitor)
         ;
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/pre_processor_factory.cpp b/pythonbindings/src/gpu/submodules/pre_processor_factory.cpp
deleted file mode 100644
index b76ae285b413594847f6672e7f3fc0e656da3cec..0000000000000000000000000000000000000000
--- a/pythonbindings/src/gpu/submodules/pre_processor_factory.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <gpu/VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactory.h>
-#include <gpu/VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h>
-
-namespace pre_processor_factory
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::class_<PreProcessorFactory, std::shared_ptr<PreProcessorFactory>>(parentModule, "_PreProcessorFactory");
-
-        py::class_<PreProcessorFactoryImp, PreProcessorFactory, std::shared_ptr<PreProcessorFactoryImp>>(parentModule, "PreProcessorFactory")
-        .def("get_instance", &PreProcessorFactoryImp::getInstance, py::return_value_policy::reference);
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/probes.cpp b/pythonbindings/src/gpu/submodules/probes.cpp
index 9b751a513d23a8a17e4e9981ababc720fc7346b0..6993d9617d870922d7ed90ed9ecbebb8a797be25 100644
--- a/pythonbindings/src/gpu/submodules/probes.cpp
+++ b/pythonbindings/src/gpu/submodules/probes.cpp
@@ -3,6 +3,8 @@
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
 
 namespace probes
@@ -13,13 +15,23 @@ namespace probes
     {
         py::module probeModule = parentModule.def_submodule("probes");
 
-        py::enum_<PostProcessingVariable>(probeModule, "PostProcessingVariable")
-        .value("Instantaneous", PostProcessingVariable::Instantaneous)
-        .value("Means", PostProcessingVariable::Means)
-        .value("Variances", PostProcessingVariable::Variances);
+        py::enum_<Statistic>(probeModule, "Statistic")
+        .value("Instantaneous", Statistic::Instantaneous)
+        .value("Means", Statistic::Means)
+        .value("Variances", Statistic::Variances)
+        .value("SpatialMeans", Statistic::SpatialMeans)
+        .value("SpatioTemporalMeans", Statistic::SpatioTemporalMeans)
+        .value("SpatialCovariances", Statistic::SpatialCovariances)
+        .value("SpatioTemporalCovariances", Statistic::SpatioTemporalCovariances)
+        .value("SpatialSkewness", Statistic::SpatialSkewness)
+        .value("SpatioTemporalSkewness", Statistic::SpatioTemporalSkewness)
+        .value("SpatialFlatness", Statistic::SpatialFlatness)
+        .value("SpatioTemporalFlatness", Statistic::SpatioTemporalFlatness);
 
         py::class_<Probe, PreCollisionInteractor, std::shared_ptr<Probe>>(probeModule, "Probe")
-        .def("add_post_processing_variable", &Probe::addPostProcessingVariable);
+        .def("add_statistic", &Probe::addStatistic)
+        .def("set_file_name_to_n_out", &Probe::setFileNameToNOut)
+        .def("add_all_available_statistics", &Probe::addAllAvailableStatistics);
 
         py::class_<PointProbe, Probe, std::shared_ptr<PointProbe>>(probeModule, "PointProbe")
         .def(py::init<
@@ -27,10 +39,12 @@ namespace probes
                         const std::string,
                         uint,
                         uint, 
+                        uint,
                         uint>(), 
                         "probe_name",
                         "output_path"
                         "t_start_avg",
+                        "t_avg",
                         "t_start_out",
                         "t_out")
         .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList)
@@ -42,14 +56,55 @@ namespace probes
                         const std::string,
                         uint,
                         uint, 
+                        uint,
                         uint>(), 
                         "probe_name",
                         "output_path"
                         "t_start_avg",
+                        "t_avg",
                         "t_start_out",
                         "t_out")
         .def("set_probe_plane", &PlaneProbe::setProbePlane);
 
+        py::class_<PlanarAverageProbe, Probe, std::shared_ptr<PlanarAverageProbe>>(probeModule, "PlanarAverageProbe")
+        .def(py::init<
+                        const std::string,
+                        const std::string,
+                        uint,
+                        uint,
+                        uint,
+                        uint,
+                        uint,
+                        char>(),
+                        "probe_name",
+                        "output_path",
+                        "t_start_avg",
+                        "t_start_tmp_avg",
+                        "t_avg",
+                        "t_start_out",
+                        "t_out",
+                        "plane_normal");
+
+
+        py::class_<WallModelProbe, Probe, std::shared_ptr<WallModelProbe>>(probeModule, "WallModelProbe")
+        .def(py::init<
+                        const std::string,
+                        const std::string,
+                        uint,
+                        uint, 
+                        uint,
+                        uint,
+                        uint>(), 
+                        "probe_name",
+                        "output_path"
+                        "t_start_avg",
+                        "t_start_tmp_avg",
+                        "t_avg",
+                        "t_start_out",
+                        "t_out")
+        .def("set_force_output_to_stress", &WallModelProbe::setForceOutputToStress)
+        .def("set_evaluate_pressure_gradient", &WallModelProbe::setEvaluatePressureGradient);
+
         return probeModule;
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/simulation.cpp b/pythonbindings/src/gpu/submodules/simulation.cpp
index 9683e8768417ad2422fb1a53ef6a428543bceffc..b775d604ba41530223f22738c72785b2c15348b3 100644
--- a/pythonbindings/src/gpu/submodules/simulation.cpp
+++ b/pythonbindings/src/gpu/submodules/simulation.cpp
@@ -15,11 +15,18 @@ namespace simulation
 
     void makeModule(py::module_ &parentModule)
     {
+        // missing setFactories and setDataWriter, not possible to wrap these functions as long as they take unique ptr arguments
         py::class_<Simulation>(parentModule, "Simulation")
-        .def(py::init<vf::gpu::Communicator&>(), "communicator")
-        .def("set_factories", &Simulation::setFactories)
-        .def("init", &Simulation::init)
+        .def(py::init<  std::shared_ptr<Parameter>,
+                        std::shared_ptr<CudaMemoryManager>,
+                        vf::gpu::Communicator &,
+                        GridProvider &>(), 
+                        "parameter",
+                        "memoryManager",
+                        "communicator",
+                        "gridProvider")
         .def("run", &Simulation::run)
-        .def("free", &Simulation::free);
+        .def("addKineticEnergyAnalyzer", &Simulation::addKineticEnergyAnalyzer)
+        .def("addEnstrophyAnalyzer", &Simulation::addEnstrophyAnalyzer);
     }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h
index 9b05b9baae1ba13bfcaf6848d1855d7273ca6d8b..42997be82687ab480a2d4d45b0793ba307e2ebf4 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h
@@ -7,18 +7,18 @@
 #include "GPU/CudaMemoryManager.h"
 
 //2nd
-extern "C" void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void init2ndMoments(Parameter* para);
-extern "C" void calc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void init2ndMoments(Parameter* para);
+void calc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 //3rd
-extern "C" void alloc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void init3rdMoments(Parameter* para);
-extern "C" void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void alloc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void init3rdMoments(Parameter* para);
+void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 //higher order
-extern "C" void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void initHigherOrderMoments(Parameter* para);
-extern "C" void calcHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void initHigherOrderMoments(Parameter* para);
+void calcHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h
index 71b7b7afdc5a0508d030e1698f352cd7686b96dc..262c22a1a557bfdd6aefaee492d2f8351f166599 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h
@@ -6,9 +6,9 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void calcMedian(Parameter* para, unsigned int tdiff);
-extern "C" void resetMedian(Parameter* para);
+void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void calcMedian(Parameter* para, unsigned int tdiff);
+void resetMedian(Parameter* para);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
index 6893d6dcb4f4c35cc55fda6ad3dbdf93c0bd2a83..f70973eb5921a17c3229a026623de2a0ef9f3ce4 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
@@ -6,11 +6,11 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager);
-extern "C" void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
-extern "C" void calcTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
-extern "C" void resetVelocityFluctuationsAndMeans(Parameter *para, CudaMemoryManager *cudaMemoryManager);
-extern "C" void cudaFreeTurbulenceIntensityArrays(Parameter *para, CudaMemoryManager *cudaMemoryManager);
+void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager);
+void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
+void calcTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
+void resetVelocityFluctuationsAndMeans(Parameter *para, CudaMemoryManager *cudaMemoryManager);
+void cudaFreeTurbulenceIntensityArrays(Parameter *para, CudaMemoryManager *cudaMemoryManager);
 
 
 void writeTurbulenceIntensityToFile(Parameter *para, uint timestep);
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp b/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
index 08901207f6b6d132a13666bb41dd2cc0508f8724..8c397326c59cff9b0ca9870d118faa3e8a4527c5 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
@@ -370,7 +370,7 @@ void printCaseFile(Parameter* para)
 
 
 
-extern "C" void printGeoFile(Parameter* para, bool fileFormat)
+void printGeoFile(Parameter* para, bool fileFormat)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename geo
@@ -545,7 +545,7 @@ extern "C" void printGeoFile(Parameter* para, bool fileFormat)
 
 
 
-extern "C" void printScalars(Parameter* para, bool fileFormat)
+void printScalars(Parameter* para, bool fileFormat)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename scalar
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Cp.h b/src/gpu/VirtualFluids_GPU/Calculation/Cp.h
index 5bb4b9c3cc81381fd3b9fb69f97636d53c8f39ee..bc70e1ae093269f038e699c0c5ce66cee63c0b12 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Cp.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Cp.h
@@ -6,23 +6,23 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void calcCp(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void printCpTopIntermediateStep(Parameter* para, unsigned int t, int lev);
-extern "C" void printCpTop(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void printCpBottom(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void printCpBottom2(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void calcCp(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void printCpTopIntermediateStep(Parameter* para, unsigned int t, int lev);
+void printCpTop(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void printCpBottom(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void printCpBottom2(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 
 
-extern "C" void excludeGridInterfaceNodesForMirror(Parameter* para, int lev);
-extern "C" void calcPressForMirror(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void excludeGridInterfaceNodesForMirror(Parameter* para, int lev);
+void calcPressForMirror(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
 //Ensight Gold
-extern "C" void printCaseFile(Parameter* para);
-extern "C" void printGeoFile(Parameter* para, bool fileFormat);
-extern "C" void printScalars(Parameter* para, bool fileFormat);
+void printCaseFile(Parameter* para);
+void printGeoFile(Parameter* para, bool fileFormat);
+void printScalars(Parameter* para, bool fileFormat);
 //functions to write binary files
-extern "C" void writeIntToFile(const int &i, std::ofstream &ofile);
-extern "C" void writeFloatToFile(const float &f, std::ofstream &ofile);
-extern "C" void writeStringToFile(const std::string &s, std::ofstream &ofile);
+void writeIntToFile(const int &i, std::ofstream &ofile);
+void writeFloatToFile(const float &f, std::ofstream &ofile);
+void writeStringToFile(const std::string &s, std::ofstream &ofile);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h
index ab531e576d7666e233c345ecf64338be149cafd5..8be15d423b65e0fdffc3a5af44e7dc5dbdbf4e6a 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h
@@ -6,8 +6,8 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void calcDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void allocDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void printDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
+void calcDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void allocDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void printDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h
index 684b3e9946e3e3e693d7cd36c0bb6445382b1b92..4506d23abd7068697a089c926d684406af789aef 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h
@@ -35,8 +35,8 @@ private:
 };
 
 
-//extern "C" void calcVeloForce(Parameter* para);
-//extern "C" void allocVeloForForcing(Parameter* para);
-//extern "C" void printForcing(Parameter* para);
+//void calcVeloForce(Parameter* para);
+//void allocVeloForForcing(Parameter* para);
+//void printForcing(Parameter* para);
 
 #endif /* FORCE_CALCULATIONS_H */
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h
index 47443169829e5bdd1d6c6ed31eaca1c259c783b7..50f49b85df2a87e3921ac7133630c128da0caebd 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h
@@ -8,15 +8,15 @@
 #include <iostream>
 #include <stdio.h>
 
-extern "C" void setSizeOfPlane(Parameter* para, int lev, unsigned int z);
-extern "C" void calcPressure(Parameter* para, std::string inorout, int lev);
-extern "C" void calcFlowRate(Parameter* para, int lev);
+void setSizeOfPlane(Parameter* para, int lev, unsigned int z);
+void calcPressure(Parameter* para, std::string inorout, int lev);
+void calcFlowRate(Parameter* para, int lev);
 
 //advection + diffusion
-extern "C" void calcPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void allocPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void calcPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void allocPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
-extern "C" void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
+void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 661a1aafde6b392eb9568036b46d2adc62677739..c0b1df930c7fadaa4a6a3161d83c1780266284cf 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -73,26 +73,26 @@ private:
 
 
 
-extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex);
-extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex);
+void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex);
 
-extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager,
+void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager,
                                  int level, int streamIndex);
-extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager,
+void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager,
                                  int level, int streamIndex);
-extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator &comm,
                                                        CudaMemoryManager *cudaManager, int level, bool useReducedComm);
 
 
 
-extern "C" void swapBetweenEvenAndOddTimestep(Parameter* para, int level);
+void swapBetweenEvenAndOddTimestep(Parameter* para, int level);
 
-extern "C" void calcMacroscopicQuantities(Parameter* para, int level);
+void calcMacroscopicQuantities(Parameter* para, int level);
 
-extern "C" void calcTurbulentViscosity(Parameter* para, int level);
+void calcTurbulentViscosity(Parameter* para, int level);
 
-extern "C" void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
+void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
 
-extern "C" void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
+void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index 78e62708474b0e498e51c161f0821627fcfff3de..ec930ebbc06554e948204b74e79e0e25b85f57b5 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -14,9 +14,9 @@
 
 //////////////////////////////////////////////////////////////////////////
 // 1D domain decomposition
-extern "C" void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                          int level);
-extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                           int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition
@@ -24,11 +24,11 @@ extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator
 // functions used for all directions
 
 //! \brief Collect the send nodes in a buffer on the gpu
-extern "C" void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
+void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
                                             std::vector<ProcessNeighbor27> *sendProcessNeighbor,
                                             unsigned int numberOfSendProcessNeighbors);
 //! \brief Distribute the receive nodes from the buffer on the gpu
-extern "C" void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
+void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
                                               std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                               unsigned int numberOfRecvProcessNeighbors);
 //! \brief Copy nodes which are part of the communication in multiple directions
@@ -40,7 +40,7 @@ extern "C" void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int st
 //! copied 
 //! \param recvProcessNeighborHost is a reference to the receive buffer on the host, nodes are copied from here
 //! \param sendProcessNeighborHost is a reference to the send buffer on the host, nodes are copied to here
-extern "C" void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes,
+void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes,
                               std::vector<ProcessNeighbor27> &recvProcessNeighborHost,
                               std::vector<ProcessNeighbor27> &sendProcessNeighborHost);
 
@@ -49,12 +49,12 @@ extern "C" void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositi
 
 //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
-extern "C" void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex);
 //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
 //! See [master thesis of Anna Wellmann]
-extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 //! \brief Exchange routine in x direction for simulations on multiple gpus
 //! \details Send and receive the nodes from the communication buffers on the gpus.
 //! \param Communicator is needed for the communication between the processes with mpi
@@ -62,7 +62,7 @@ extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int leve
 //! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding
 //! \param sendProcessNeighborDev, recvProcessNeighborDev, sendProcessNeighborHost, recvProcessNeighborHost are pointers
 //! to the send and receive arrays, both on the device and the host
-extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, int streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
@@ -70,85 +70,85 @@ extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &c
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes
 //! \details Used in the communication after collision step
-extern "C" void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two
 //! interpolation processes on refined grids 
 //! \details Only exchange nodes which are part of the interpolation process on
 //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and
 //! before the interpolation coarse to fine. See [master thesis of Anna Wellmann]
-extern "C" void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
-extern "C" void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
 //! See [master thesis of Anna Wellmann]
-extern "C" void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
 // y
 
-extern "C" void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
-extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, int streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHos);
-extern "C" void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-extern "C" void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-extern "C" void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 // z
-extern "C" void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
-extern "C" void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, int streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
-extern "C" void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-extern "C" void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
 
-extern "C" void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition convection diffusion
-extern "C" void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition F3 - K18/K20
-extern "C" void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-extern "C" void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-extern "C" void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
 //////////////////////////////////////////////////////////////////////////
-extern "C" void barrierGPU(vf::gpu::Communicator &comm);
+void barrierGPU(vf::gpu::Communicator &comm);
 //////////////////////////////////////////////////////////////////////////
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
index 056c25d39489abc9ae9491f771f05d345a57d02a..3be49570b33d99f9517796b33934dee1e2f31221 100644
--- a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
+++ b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
@@ -5,7 +5,7 @@
 #include "lbm/constants/D3Q27.h"
 
 
-extern "C" void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC, 
+void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC, 
                                unsigned int LxCoarse, unsigned int LyCoarse, unsigned int LzCoarse, 
                                unsigned int LxFine, unsigned int LyFine, unsigned int LzFine, 
                                unsigned int dNx, unsigned int dNy, unsigned int dNz, 
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h
index 92934f85198f9141c97e78a433049d01f3c4b075..045d77f25cab213dfd130fe068f0724b6955bc4d 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h
+++ b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h
@@ -6,10 +6,10 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
-extern "C" void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
-extern "C" void findPressQShip(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void findPressQShip(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h
index 25575bb38b5d57c40cc4c9da973b12fa4a30bd2e..551205bd54d9685e9aa6ab8be47ac9e274546f40 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h
+++ b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h
@@ -5,42 +5,42 @@
 #include "lbm/constants/D3Q27.h"
 #include "Parameter/Parameter.h"
 
-extern "C" void findQ(Parameter* para, int lev);
+void findQ(Parameter* para, int lev);
 
-extern "C" void findKforQ(Parameter* para, int lev);
+void findKforQ(Parameter* para, int lev);
 
-extern "C" void findQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk, unsigned int sizeQ, real* QQ, QforBoundaryConditions &QIN);
+void findQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk, unsigned int sizeQ, real* QQ, QforBoundaryConditions &QIN);
 
-extern "C" void findKforQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QIN);
+void findKforQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QIN);
 
-extern "C" void findQInflow(Parameter* para);
+void findQInflow(Parameter* para);
 
-extern "C" void findKforQInflow(Parameter* para);
+void findKforQInflow(Parameter* para);
 
-extern "C" void findQPressInflow(Parameter* para);
+void findQPressInflow(Parameter* para);
 
-extern "C" void findKforQPressInflow(Parameter* para);
+void findKforQPressInflow(Parameter* para);
 
-extern "C" void findQOutflow(Parameter* para);
+void findQOutflow(Parameter* para);
 
-extern "C" void findKforQOutflow(Parameter* para);
+void findKforQOutflow(Parameter* para);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
-//extern "C" void findQSchlaff( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk,
+//void findQSchlaff( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk,
 //                              unsigned int sizeQN, real* vxN, real* vyN, real* vzN, real*deltaVN, real* QQN, QforBoundaryConditions &QNin,
 //                              unsigned int sizeQS, real* vxS, real* vyS, real* vzS, real*deltaVS, real* QQS, QforBoundaryConditions &QSin,
 //                              unsigned int sizeQE, real* vxE, real* vyE, real* vzE, real*deltaVE, real* QQE, QforBoundaryConditions &QEin,
 //                              unsigned int sizeQW, real* vxW, real* vyW, real* vzW, real*deltaVW, real* QQW, QforBoundaryConditions &QWin);
 //
-//extern "C" void findKforQSchlaff(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QN, QforBoundaryConditions &QS, QforBoundaryConditions &QE, QforBoundaryConditions &QW);
+//void findKforQSchlaff(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QN, QforBoundaryConditions &QS, QforBoundaryConditions &QE, QforBoundaryConditions &QW);
 
 
-extern "C" void findKforQPressX1(Parameter* para, int lev);
+void findKforQPressX1(Parameter* para, int lev);
 
-extern "C" void findQPressX1(Parameter* para, int lev);
+void findQPressX1(Parameter* para, int lev);
 
-extern "C" void findKforQPressX0(Parameter* para, int lev);
+void findKforQPressX0(Parameter* para, int lev);
 
-extern "C" void findQPressX0(Parameter* para, int lev);
+void findQPressX0(Parameter* para, int lev);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
index 9dfa96e10c750b3494bc0733421cd91b9219dfb5..8e3e5e28cdaff965500b79fd813a4619d181f4ff 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
@@ -66,7 +66,7 @@ inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
+__global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 	real omegaDiffusivity,
 	uint* typeOfGridNode,
 	uint* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
index cc2b3c7c9311def9f3d94f264e68e9ff159d513f..dc9e8c7b6c50cfc954d81ef517eda25f32610e3c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPress7(  real* DD, 
+__global__ void QADPress7(  real* DD, 
                                        real* DD7, 
                                        real* temp,
                                        real* velo,
@@ -449,7 +449,7 @@ extern "C" __global__ void QADPress7(  real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPress27( real* DD, 
+__global__ void QADPress27( real* DD, 
                                        real* DD27, 
                                        real* temp,
                                        real* velo,
@@ -980,7 +980,7 @@ extern "C" __global__ void QADPress27( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressNEQNeighbor27(
+__global__ void QADPressNEQNeighbor27(
 													real* DD,
 													real* DD27,
 													int* k_Q,
@@ -1415,7 +1415,7 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVel7( real* DD, 
+__global__ void QADVel7( real* DD, 
                                     real* DD7, 
                                     real* temp,
                                     real* velo,
@@ -1832,14 +1832,14 @@ extern "C" __global__ void QADVel7( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVel27(real* DD, 
+__global__ void QADVel27(real* DD, 
                                     real* DD27, 
                                     real* temp,
                                     real* velo,
                                     real diffusivity,
                                     int* k_Q, 
                                     real* QQ,
-                                    int numberOfBCnodes, 
+                                    unsigned int numberOfBCnodes, 
                                     real om1, 
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
@@ -2420,7 +2420,7 @@ extern "C" __global__ void QADVel27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QAD7( real* DD, 
+__global__ void QAD7( real* DD, 
                                  real* DD7, 
                                  real* temp,
                                  real diffusivity,
@@ -2852,7 +2852,7 @@ extern "C" __global__ void QAD7( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADDirichlet27(
+__global__ void QADDirichlet27(
 											 real* DD, 
 											 real* DD27, 
 											 real* temp,
@@ -3378,7 +3378,7 @@ extern "C" __global__ void QADDirichlet27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADBB27( real* DD, 
+__global__ void QADBB27( real* DD, 
                                    real* DD27, 
                                    real* temp,
                                    real diffusivity,
@@ -3893,7 +3893,7 @@ extern "C" __global__ void QADBB27( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QNoSlipADincomp7(
+__global__ void QNoSlipADincomp7(
 											 real* DD, 
 											 real* DD7, 
 											 real* temp,
@@ -4317,7 +4317,7 @@ extern "C" __global__ void QNoSlipADincomp7(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QNoSlipADincomp27(
+__global__ void QNoSlipADincomp27(
 											 real* DD, 
 											 real* DD27, 
 											 real* temp,
@@ -4798,7 +4798,7 @@ extern "C" __global__ void QNoSlipADincomp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVeloIncomp7(
+__global__ void QADVeloIncomp7(
 											real* DD, 
 											real* DD7, 
 											real* temp,
@@ -5276,7 +5276,7 @@ extern "C" __global__ void QADVeloIncomp7(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVeloIncomp27(
+__global__ void QADVeloIncomp27(
 											real* DD, 
 											real* DD27, 
 											real* temp,
@@ -5789,9 +5789,7 @@ extern "C" __global__ void QADVeloIncomp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressIncomp7(int inx,
-										   int iny,
-										   real* DD, 
+__global__ void QADPressIncomp7( real* DD, 
 										   real* DD7, 
 										   real* temp,
 										   real* velo,
@@ -6229,7 +6227,7 @@ extern "C" __global__ void QADPressIncomp7(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressIncomp27(
+__global__ void QADPressIncomp27(
 											   real* DD,
 											   real* DD27,
 											   real* temp,
@@ -6728,7 +6726,7 @@ inline __device__ real calcDistributionBC_AD(real q, real weight, real v, real v
 
 // has to be excecuted before Fluid BCs
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void AD_SlipVelDeviceComp(
+__global__ void AD_SlipVelDeviceComp(
     real *normalX,
     real *normalY,
     real *normalZ,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
index 9a02f5544b9f7ddb2228e87ca9a35cbd7c332a09..bccc1e144682989adbaa50573f0959d0a24e0ef3 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcCP27(real* DD, 
+__global__ void CalcCP27(real* DD, 
 									int* cpIndex, 
 									int nonCp, 
 									double *cpPress,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
index e3755ff2796ff180f0a3ae139f1c450cfcc4296a..3a14714f59745b0355da73dc34a01891e7c3394b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														real* kyzFromfcNEQ,
 														real* kxzFromfcNEQ,
 														real* kxxMyyFromfcNEQ,
@@ -206,7 +206,7 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													real* kyzFromfcNEQ,
 													real* kxzFromfcNEQ,
 													real* kxxMyyFromfcNEQ,
@@ -411,7 +411,7 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
+__global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														real* CUMabc,
 														real* CUMbac,
 														real* CUMbca,
@@ -845,7 +845,7 @@ extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
+__global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													real* CUMabc,
 													real* CUMbac,
 													real* CUMbca,
@@ -1283,7 +1283,7 @@ extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
+__global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															real* CUMbcb,
 															real* CUMbbc,
 															real* CUMcca,
@@ -1737,7 +1737,7 @@ extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
+__global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														real* CUMbcb,
 														real* CUMbbc,
 														real* CUMcca,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
index c71f58bdfd69397338959162ee167527ec9d4380..65fa2f5392b5d82236d1c1ff703e9cf17bba6e77 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
@@ -39,7 +39,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcConc27(
+__global__ void CalcConc27(
 	real* concentration,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -224,7 +224,7 @@ extern "C" __global__ void CalcConc27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcConc7( real* Conc,
+__global__ void CalcConc7( real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -315,7 +315,7 @@ extern "C" __global__ void CalcConc7( real* Conc,
 
 // DEPRECATED (2022)
 //////////////////////////////////////////////////////////////////////////////////
-// extern "C" __global__ void LBCalcMacThS27(real* Conc,
+// __global__ void LBCalcMacThS27(real* Conc,
 //                                           unsigned int* geoD,
 //                                           unsigned int* neighborX,
 //                                           unsigned int* neighborY,
@@ -469,7 +469,7 @@ extern "C" __global__ void CalcConc7( real* Conc,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void GetPlaneConc7(real* Conc,
+__global__ void GetPlaneConc7(real* Conc,
 								            int* kPC,
 								            unsigned int numberOfPointskPC,
 											unsigned int* geoD,
@@ -574,7 +574,7 @@ extern "C" __global__ void GetPlaneConc7(real* Conc,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void GetPlaneConc27(real* Conc,
+__global__ void GetPlaneConc27(real* Conc,
 								             int* kPC,
 								             unsigned int numberOfPointskPC,
 											 unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index ba114ef1d80eeac869086179aab16428bffa74b4..b336a879c06bdc20d44ab62555fd8020753ce42d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -19,7 +19,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMac27( real* vxD,
+__global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
                                         real* vzD,
                                         real* rhoD,
@@ -70,7 +70,7 @@ extern "C" __global__ void LBCalcMac27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacSP27( real* vxD,
+__global__ void LBCalcMacSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -264,7 +264,7 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, real *rhoD, real *pressD,
+__global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, real *rhoD, real *pressD,
                                              unsigned int *geoD, unsigned int *neighborX, unsigned int *neighborY,
                                              unsigned int *neighborZ, unsigned int size_Mat, real *distributions,
                                              bool isEvenTimestep)
@@ -330,7 +330,7 @@ extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, re
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMedSP27( real* vxD,
+__global__ void LBCalcMedSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -554,7 +554,7 @@ extern "C" __global__ void LBCalcMedSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
+__global__ void LBCalcMedCompSP27( real* vxD,
 											  real* vyD,
 											  real* vzD,
 											  real* rhoD,
@@ -831,7 +831,7 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMedCompAD27(
+__global__ void LBCalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1159,7 +1159,7 @@ extern "C" __global__ void LBCalcMedCompAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
+__global__ void LBCalcMacMedSP27( real* vxD,
                                              real* vyD,
                                              real* vzD,
                                              real* rhoD,
@@ -1231,7 +1231,7 @@ extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBResetMedianValuesSP27(
+__global__ void LBResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1284,7 +1284,7 @@ extern "C" __global__ void LBResetMedianValuesSP27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBResetMedianValuesAD27(
+__global__ void LBResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1339,7 +1339,7 @@ extern "C" __global__ void LBResetMedianValuesAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMeasurePoints( real* vxMP,
+__global__ void LBCalcMeasurePoints( real* vxMP,
 												real* vyMP,
 												real* vzMP,
 												real* rhoMP,
@@ -1550,7 +1550,7 @@ extern "C" __global__ void LBCalcMeasurePoints( real* vxMP,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBSetOutputWallVelocitySP27( real* vxD,
+__global__ void LBSetOutputWallVelocitySP27( real* vxD,
 														real* vyD,
 														real* vzD,
 														real* vxWall,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
index 6f4ac6ca5634f280b877cfd86fb7c01ce4870857..db94bb4e6109e96501aadfbff91f7720a639ba3e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
@@ -9,7 +9,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
+__global__ void LB_Kernel_Cascade_SP_27(     real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -836,7 +836,7 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
+__global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 														   unsigned int* bcMatD,
 														   unsigned int* neighborX,
 														   unsigned int* neighborY,
@@ -1683,7 +1683,7 @@ extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
+__global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
                                                          unsigned int* bcMatD,
                                                          unsigned int* neighborX,
                                                          unsigned int* neighborY,
@@ -2315,7 +2315,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
+__global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
                                                       unsigned int* bcMatD,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
@@ -2840,7 +2840,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
+__global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
                                                         unsigned int* bcMatD,
                                                         unsigned int* neighborX,
                                                         unsigned int* neighborY,
@@ -3362,7 +3362,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
+__global__ void LB_Kernel_Casc_SP_27(  real omega,
                                                   unsigned int* bcMatD,
                                                   unsigned int* neighborX,
                                                   unsigned int* neighborY,
@@ -4054,7 +4054,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc27(real omega,
+__global__ void LB_Kernel_Casc27(real omega,
                                             unsigned int* bcMatD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
index 7a12d97d8dc5091a2479d84930b610a023aa6b17..3f7f6e661e5f365b509b48332cddfbd0012e0079 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
@@ -15,7 +15,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
+__global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 																unsigned int* bcMatD,
 																unsigned int* neighborX,
 																unsigned int* neighborY,
@@ -966,7 +966,7 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
+__global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -2005,7 +2005,7 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
+__global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													real deltaPhi,
 													real angularVelocity,
 													unsigned int* bcMatD,
@@ -3232,7 +3232,7 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
+__global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -4504,7 +4504,7 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
+__global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -5444,7 +5444,7 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
+__global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
index 19683722f50f402fdc6dfdf5c7774d41b178874c..b4430d08ebc9888b1a8a22a81680e657e65c2f71 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
@@ -44,7 +44,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
@@ -959,7 +959,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
@@ -1755,7 +1755,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
+__global__ void Cumulant_One_chim_Comp_SP_27(
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
index e6b696768bfa604f01bd92d34dde95100b8a29f6..51a68ac62384c3e9c2f6e63af2624f86546583bc 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
@@ -16,7 +16,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 /////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
+__global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 															 unsigned int* bcMatD,
 															 unsigned int* neighborX,
 															 unsigned int* neighborY,
@@ -992,7 +992,7 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 
 
 ///////////////////////////////////////////////////////////////////////////////////
-//extern "C" __global__ void LB_Kernel_Cumulant_D3Q27F3_2018(	real omega,
+//__global__ void LB_Kernel_Cumulant_D3Q27F3_2018(	real omega,
 //															unsigned int* bcMatD,
 //															unsigned int* neighborX,
 //															unsigned int* neighborY,
@@ -1972,7 +1972,7 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 
 
 /////////////////////////////////////////////////////////////////////////////////
-//extern "C" __global__ void LB_Kernel_Cumulant_D3Q27F3(	real omega,
+//__global__ void LB_Kernel_Cumulant_D3Q27F3(	real omega,
 //														unsigned int* bcMatD,
 //														unsigned int* neighborX,
 //														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
index 133c2aef257d30c9b4bb064ff4bff8e3d0593c28..82e05c6125ee0c5ac3e006e8888a5fa5a37db1b9 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void DragLiftPost27(  real* DD, 
+__global__ void DragLiftPost27(  real* DD, 
 											int* k_Q, 
 											real* QQ,
 											int numberOfBCnodes, 
@@ -272,7 +272,7 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void DragLiftPre27(   real* DD, 
+__global__ void DragLiftPre27(   real* DD, 
 											int* k_Q, 
 											real* QQ,
 											int numberOfBCnodes, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
index 6b10ede662132ac18f5a3474d2a6d654bf6c835c..3e6d4e9bb1231160ec76faa2d90e7a49c2b71dee 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void getSendFsPost27(real* DD,
+__global__ void getSendFsPost27(real* DD,
 										   real* bufferFs,
 										   int* sendIndex,
                                            int buffmax,
@@ -243,7 +243,7 @@ extern "C" __global__ void getSendFsPost27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void setRecvFsPost27(real* DD,
+__global__ void setRecvFsPost27(real* DD,
 										   real* bufferFs,
 										   int* recvIndex,
                                            int buffmax,
@@ -478,7 +478,7 @@ extern "C" __global__ void setRecvFsPost27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void getSendFsPre27(real* DD,
+__global__ void getSendFsPre27(real* DD,
 										  real* bufferFs,
 										  int* sendIndex,
                                           int buffmax,
@@ -687,7 +687,7 @@ extern "C" __global__ void getSendFsPre27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void setRecvFsPre27(real* DD,
+__global__ void setRecvFsPre27(real* DD,
 										  real* bufferFs,
 										  int* recvIndex,
                                           int buffmax,
@@ -895,7 +895,7 @@ extern "C" __global__ void setRecvFsPre27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void getSendGsF3(
+__global__ void getSendGsF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -998,7 +998,7 @@ extern "C" __global__ void getSendGsF3(
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void setRecvGsF3(
+__global__ void setRecvGsF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index 19b6081b3df73b60393bdc19e8dc3d05e1534391..f77967b46352cc95d95d4e3fa6db206e4308da73 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -21,7 +21,7 @@ class Parameter;
 //////////////////////////////////////////////////////////////////////////
 //Kernel
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCas27(unsigned int grid_nx, 
+void KernelCas27(unsigned int grid_nx, 
                             unsigned int grid_ny, 
                             unsigned int grid_nz, 
                             real s9,
@@ -33,7 +33,7 @@ extern "C" void KernelCas27(unsigned int grid_nx,
                             int size_Mat,
                             bool EvenOrOdd);
 
-extern "C" void KernelCasSP27(unsigned int numberOfThreads, 
+void KernelCasSP27(unsigned int numberOfThreads, 
                               real s9,
                               unsigned int* bcMatD,
                               unsigned int* neighborX,
@@ -43,7 +43,7 @@ extern "C" void KernelCasSP27(unsigned int numberOfThreads,
                               int size_Mat,
                               bool EvenOrOdd);
 
-extern "C" void KernelCasSPMS27(unsigned int numberOfThreads, 
+void KernelCasSPMS27(unsigned int numberOfThreads, 
                                 real s9,
                                 unsigned int* bcMatD,
                                 unsigned int* neighborX,
@@ -53,7 +53,7 @@ extern "C" void KernelCasSPMS27(unsigned int numberOfThreads,
                                 int size_Mat,
                                 bool EvenOrOdd);
 
-extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
+void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
                                    real s9,
                                    unsigned int* bcMatD,
                                    unsigned int* neighborX,
@@ -63,7 +63,7 @@ extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads,
                                    int size_Mat,
                                    bool EvenOrOdd);
 
-extern "C" void KernelKumCompSRTSP27(
+void KernelKumCompSRTSP27(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -76,7 +76,7 @@ extern "C" void KernelKumCompSRTSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
+void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
 									    real s9,
 									    unsigned int* bcMatD,
 									    unsigned int* neighborX,
@@ -88,7 +88,7 @@ extern "C" void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
 									    real* forces,
 									    bool EvenOrOdd);
 
-extern "C" void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads, 
+void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads, 
 											real s9,
 											unsigned int* bcMatD,
 											unsigned int* neighborX,
@@ -101,7 +101,7 @@ extern "C" void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads,
 											real* forces,
 											bool EvenOrOdd);
 
-extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads, 
+void KernelKum1hSP27(    unsigned int numberOfThreads, 
 									real omega,
 									real deltaPhi,
 									real angularVelocity,
@@ -116,7 +116,7 @@ extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
-extern "C" void KernelCascadeSP27(unsigned int numberOfThreads, 
+void KernelCascadeSP27(unsigned int numberOfThreads, 
 								  real s9,
 								  unsigned int* bcMatD,
 								  unsigned int* neighborX,
@@ -126,7 +126,7 @@ extern "C" void KernelCascadeSP27(unsigned int numberOfThreads,
 								  int size_Mat,
 								  bool EvenOrOdd);
 
-extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads, 
+void KernelKumNewSP27(   unsigned int numberOfThreads, 
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -137,7 +137,7 @@ extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
 									bool EvenOrOdd);
 
 
-extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
+void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -151,7 +151,7 @@ extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void CumulantOnePreconditionedChimCompSP27(
+void CumulantOnePreconditionedChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -165,7 +165,7 @@ extern "C" void CumulantOnePreconditionedChimCompSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void CumulantOneChimCompSP27(
+void CumulantOneChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -180,7 +180,7 @@ extern "C" void CumulantOneChimCompSP27(
 	bool EvenOrOdd);
 
 
-extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads, 
+void KernelKumIsoTestSP27(unsigned int numberOfThreads, 
 									 real s9,
 									 unsigned int* bcMatD,
 									 unsigned int* neighborX,
@@ -193,7 +193,7 @@ extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 									 int size_Mat,
 									 bool EvenOrOdd);
 
-extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads, 
+void KernelKumCompSP27(  unsigned int numberOfThreads, 
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -203,7 +203,7 @@ extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
-extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
+void KernelWaleBySoniMalavCumAA2016CompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -222,7 +222,7 @@ extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads, 
+void KernelPMCumOneCompSP27(unsigned int numberOfThreads, 
 									   real omega,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
@@ -238,7 +238,7 @@ extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 									   unsigned int* nodeIdsPorousMedia, 
 									   bool EvenOrOdd);
 
-extern "C" void KernelADincomp7(   unsigned int numberOfThreads, 
+void KernelADincomp7(   unsigned int numberOfThreads, 
 								   real diffusivity,
 								   unsigned int* bcMatD,
 								   unsigned int* neighborX,
@@ -249,7 +249,7 @@ extern "C" void KernelADincomp7(   unsigned int numberOfThreads,
 								   int size_Mat,
 								   bool EvenOrOdd);
 
-extern "C" void KernelADincomp27(   unsigned int numberOfThreads, 
+void KernelADincomp27(   unsigned int numberOfThreads, 
 									real diffusivity,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -260,7 +260,7 @@ extern "C" void KernelADincomp27(   unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
-extern "C" void Init27(int myid,
+void Init27(int myid,
                        int numprocs,
                        real u0,
                        unsigned int* geoD,
@@ -276,7 +276,7 @@ extern "C" void Init27(int myid,
                        int level,
                        int maxlevel);
 
-extern "C" void InitNonEqPartSP27(unsigned int numberOfThreads,
+void InitNonEqPartSP27(unsigned int numberOfThreads,
                                   unsigned int* neighborX,
                                   unsigned int* neighborY,
                                   unsigned int* neighborZ,
@@ -292,7 +292,7 @@ extern "C" void InitNonEqPartSP27(unsigned int numberOfThreads,
                                   bool EvenOrOdd);
 
 
-extern "C" void InitThS7(  unsigned int numberOfThreads,
+void InitThS7(  unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
@@ -305,7 +305,7 @@ extern "C" void InitThS7(  unsigned int numberOfThreads,
                            real* DD7,
                            bool EvenOrOdd);
 
-extern "C" void InitADDev27( unsigned int numberOfThreads,
+void InitADDev27( unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
@@ -318,7 +318,7 @@ extern "C" void InitADDev27( unsigned int numberOfThreads,
                            real* DD27,
                            bool EvenOrOdd);
 
-extern "C" void PostProcessorF3_2018Fehlberg(
+void PostProcessorF3_2018Fehlberg(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -336,7 +336,7 @@ extern "C" void PostProcessorF3_2018Fehlberg(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void CalcMac27( real* vxD,
+void CalcMac27( real* vxD,
                           real* vyD,
                           real* vzD,
                           real* rhoD,
@@ -351,7 +351,7 @@ extern "C" void CalcMac27( real* vxD,
                           real* DD,
                           bool isEvenTimestep);
 
-extern "C" void CalcMacSP27(real* vxD,
+void CalcMacSP27(real* vxD,
                             real* vyD,
                             real* vzD,
                             real* rhoD,
@@ -365,7 +365,7 @@ extern "C" void CalcMacSP27(real* vxD,
                             real* DD,
                             bool isEvenTimestep);
 
-extern "C" void CalcMacCompSP27(real* vxD,
+void CalcMacCompSP27(real* vxD,
 								real* vyD,
 								real* vzD,
 								real* rhoD,
@@ -379,7 +379,7 @@ extern "C" void CalcMacCompSP27(real* vxD,
 								real* DD,
 								bool isEvenTimestep);
 
-extern "C" void CalcMacThS7(  real* Conc,
+void CalcMacThS7(  real* Conc,
                               unsigned int* geoD,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
@@ -389,7 +389,7 @@ extern "C" void CalcMacThS7(  real* Conc,
                               real* DD7,
                               bool isEvenTimestep);
 
-extern "C" void PlaneConcThS7(real* Conc,
+void PlaneConcThS7(real* Conc,
 							  int* kPC,
 							  unsigned int numberOfPointskPC,
 							  unsigned int* geoD,
@@ -401,7 +401,7 @@ extern "C" void PlaneConcThS7(real* Conc,
 							  real* DD7,
 							  bool isEvenTimestep);
 
-extern "C" void PlaneConcThS27(real* Conc,
+void PlaneConcThS27(real* Conc,
 							   int* kPC,
 							   unsigned int numberOfPointskPC,
 							   unsigned int* geoD,
@@ -413,7 +413,7 @@ extern "C" void PlaneConcThS27(real* Conc,
 							   real* DD27,
 							   bool isEvenTimestep);
 
-extern "C" void CalcConcentration27( unsigned int numberOfThreads,
+void CalcConcentration27( unsigned int numberOfThreads,
 	                                 real* Conc,
                                      unsigned int* geoD,
                                      unsigned int* neighborX,
@@ -423,7 +423,7 @@ extern "C" void CalcConcentration27( unsigned int numberOfThreads,
                                      real* DD27,
                                      bool isEvenTimestep);
 
-extern "C" void CalcMedSP27(  real* vxD,
+void CalcMedSP27(  real* vxD,
                               real* vyD,
                               real* vzD,
                               real* rhoD,
@@ -437,7 +437,7 @@ extern "C" void CalcMedSP27(  real* vxD,
                               real* DD,
                               bool isEvenTimestep);
 
-extern "C" void CalcMedCompSP27(real* vxD,
+void CalcMedCompSP27(real* vxD,
 								real* vyD,
 								real* vzD,
 								real* rhoD,
@@ -451,7 +451,7 @@ extern "C" void CalcMedCompSP27(real* vxD,
 								real* DD,
 								bool isEvenTimestep);
 
-extern "C" void CalcMedCompAD27(
+void CalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -468,7 +468,7 @@ extern "C" void CalcMedCompAD27(
 	real* DD_AD,
 	bool isEvenTimestep);
 
-extern "C" void CalcMacMedSP27(  real* vxD,
+void CalcMacMedSP27(  real* vxD,
                                  real* vyD,
                                  real* vzD,
                                  real* rhoD,
@@ -482,7 +482,7 @@ extern "C" void CalcMacMedSP27(  real* vxD,
                                  unsigned int numberOfThreads, 
                                  bool isEvenTimestep);
 
-extern "C" void ResetMedianValuesSP27(
+void ResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -492,7 +492,7 @@ extern "C" void ResetMedianValuesSP27(
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
-extern "C" void ResetMedianValuesAD27(
+void ResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -503,7 +503,7 @@ extern "C" void ResetMedianValuesAD27(
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
-extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
+void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 real* kyzFromfcNEQ,
 										 real* kxzFromfcNEQ,
 										 real* kxxMyyFromfcNEQ,
@@ -517,7 +517,7 @@ extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 real* DD,
 										 bool isEvenTimestep);
 
-extern "C" void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
+void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 									   real* kyzFromfcNEQ,
 									   real* kxzFromfcNEQ,
 									   real* kxxMyyFromfcNEQ,
@@ -531,7 +531,7 @@ extern "C" void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 									   real* DD,
 									   bool isEvenTimestep);
 
-extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
+void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 real* CUMabc,
 										 real* CUMbac,
 										 real* CUMbca,
@@ -547,7 +547,7 @@ extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 real* DD,
 										 bool isEvenTimestep);
 
-extern "C" void Calc3rdMomentsCompSP27(real* CUMbbb,
+void Calc3rdMomentsCompSP27(real* CUMbbb,
 									   real* CUMabc,
 									   real* CUMbac,
 									   real* CUMbca,
@@ -563,7 +563,7 @@ extern "C" void Calc3rdMomentsCompSP27(real* CUMbbb,
 									   real* DD,
 									   bool isEvenTimestep);
 
-extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
+void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											real* CUMbcb,
 											real* CUMbbc,
 											real* CUMcca,
@@ -582,7 +582,7 @@ extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											real* DD,
 											bool isEvenTimestep);
 
-extern "C" void CalcHigherMomentsCompSP27(real* CUMcbb,
+void CalcHigherMomentsCompSP27(real* CUMcbb,
 										  real* CUMbcb,
 										  real* CUMbbc,
 										  real* CUMcca,
@@ -601,7 +601,7 @@ extern "C" void CalcHigherMomentsCompSP27(real* CUMcbb,
 										  real* DD,
 										  bool isEvenTimestep);
 
-extern "C" void LBCalcMeasurePoints27(real* vxMP,
+void LBCalcMeasurePoints27(real* vxMP,
                                       real* vyMP,
                                       real* vzMP,
                                       real* rhoMP,
@@ -618,7 +618,7 @@ extern "C" void LBCalcMeasurePoints27(real* vxMP,
                                       unsigned int numberOfThreads, 
                                       bool isEvenTimestep);
 
-extern "C" void BcPress27(int nx, 
+void BcPress27(int nx, 
                           int ny, 
                           int tz, 
                           unsigned int grid_nx, 
@@ -631,7 +631,7 @@ extern "C" void BcPress27(int nx,
                           unsigned int size_Mat, 
                           bool isEvenTimestep);
 
-extern "C" void BcVel27(int nx, 
+void BcVel27(int nx, 
                         int ny, 
                         int nz, 
                         int itz, 
@@ -647,11 +647,11 @@ extern "C" void BcVel27(int nx,
                         real u0x, 
                         real om);
 
-extern "C" void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
+void QDevCompThinWalls27(unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
@@ -665,9 +665,9 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QDevIncompHighNu27(  unsigned int numberOfThreads,
+void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 real* DD, 
 									 int* k_Q, 
 									 real* QQ,
@@ -679,7 +679,7 @@ extern "C" void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 unsigned int size_Mat, 
 									 bool isEvenTimestep);
 
-extern "C" void QDevCompHighNu27(unsigned int numberOfThreads,
+void QDevCompHighNu27(unsigned int numberOfThreads,
 								 real* DD, 
 								 int* k_Q, 
 								 real* QQ,
@@ -691,9 +691,9 @@ extern "C" void QDevCompHighNu27(unsigned int numberOfThreads,
 								 unsigned int size_Mat, 
 								 bool isEvenTimestep);
 
-extern "C" void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 	
-extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
+void QVelDeviceCouette27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
@@ -708,7 +708,7 @@ extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QVelDevice1h27( unsigned int numberOfThreads,
+void QVelDevice1h27( unsigned int numberOfThreads,
 								int nx,
 								int ny,
 								real* vx,
@@ -730,9 +730,9 @@ extern "C" void QVelDevice1h27( unsigned int numberOfThreads,
 								unsigned int size_Mat, 
 								bool isEvenTimestep);
 
-extern "C" void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
+void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -747,9 +747,9 @@ extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  unsigned int size_Mat, 
 									  bool isEvenTimestep);
 
-extern "C" void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
+void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           real* vx,
 							           real* vy,
 							           real* vz,
@@ -766,9 +766,9 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           unsigned int size_Mat, 
 							           bool isEvenTimestep);
 
-extern "C" void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
+void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										real* vx,
 										real* vy,
 										real* vz,
@@ -783,7 +783,7 @@ extern "C" void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										unsigned int size_Mat, 
 										bool isEvenTimestep);
 
-extern "C" void QVelDevCompHighNu27(unsigned int numberOfThreads,
+void QVelDevCompHighNu27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
@@ -798,7 +798,7 @@ extern "C" void QVelDevCompHighNu27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
+void QVeloDevEQ27(unsigned int numberOfThreads,
 							 real* VeloX,
 							 real* VeloY,
 							 real* VeloZ,
@@ -812,7 +812,7 @@ extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
 							 unsigned int size_Mat, 
 							 bool isEvenTimestep);
 
-extern "C" void QVeloStreetDevEQ27(
+void QVeloStreetDevEQ27(
 	uint  numberOfThreads,
 	real* veloXfraction,
 	real* veloYfraction,
@@ -827,13 +827,13 @@ extern "C" void QVeloStreetDevEQ27(
 	uint  size_Mat,
 	bool  isEvenTimestep);
 
-extern "C" void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QSlipGeomDevComp27( unsigned int numberOfThreads,
+void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
@@ -848,7 +848,7 @@ extern "C" void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
+void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   real* DD, 
 								   int* k_Q, 
 								   real* QQ,
@@ -863,13 +863,13 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   unsigned int size_Mat, 
 								   bool isEvenTimestep);
 
-extern "C" void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
+void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
 
-extern "C" void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
+void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
 
-extern "C" void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevFixBackflow27(unsigned int numberOfThreads,
+void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                        real* rhoBC,
                                        real* DD, 
                                        int* k_Q, 
@@ -881,7 +881,7 @@ extern "C" void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                        unsigned int size_Mat, 
                                        bool isEvenTimestep);
 
-extern "C" void QPressDevDirDepBot27(unsigned int numberOfThreads,
+void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                      real* rhoBC,
                                      real* DD, 
                                      int* k_Q, 
@@ -893,11 +893,11 @@ extern "C" void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                      unsigned int size_Mat, 
                                      bool isEvenTimestep);
 
-extern "C" void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevOld27(unsigned int numberOfThreads,
+void QPressDevOld27(unsigned int numberOfThreads,
                                real* rhoBC,
                                real* DD, 
                                int* k_Q, 
@@ -910,13 +910,13 @@ extern "C" void QPressDevOld27(unsigned int numberOfThreads,
                                unsigned int size_Mat, 
                                bool isEvenTimestep);
 
-extern "C" void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevZero27(unsigned int numberOfThreads,
+void QPressDevZero27(unsigned int numberOfThreads,
                                 real* DD, 
                                 int* k_Q, 
                                 unsigned int numberOfBCnodes, 
@@ -926,7 +926,7 @@ extern "C" void QPressDevZero27(unsigned int numberOfThreads,
                                 unsigned int size_Mat, 
                                 bool isEvenTimestep);
 
-extern "C" void QPressDevFake27(   unsigned int numberOfThreads,
+void QPressDevFake27(   unsigned int numberOfThreads,
 								   real* rhoBC,
 								   real* DD, 
 								   int* k_Q, 
@@ -939,9 +939,9 @@ extern "C" void QPressDevFake27(   unsigned int numberOfThreads,
 								   unsigned int size_Mat, 
 								   bool isEvenTimestep);
 
-extern "C" void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
+void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									real* rho,
 									real* DD, 
 									int* k_Q, 
@@ -954,7 +954,7 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
+void QPressDevAntiBB27(  unsigned int numberOfThreads,
 								  real* rhoBC,
 								  real* vx,
 								  real* vy,
@@ -970,7 +970,7 @@ extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
 								  unsigned int size_Mat, 
 								  bool isEvenTimestep);
 
-extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
+void PressSchlaffer27(unsigned int numberOfThreads,
                                  real* rhoBC,
                                  real* DD,
                                  real* vx0,
@@ -987,7 +987,7 @@ extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
                                  unsigned int size_Mat, 
                                  bool isEvenTimestep);
 
-extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
+void VelSchlaffer27(  unsigned int numberOfThreads,
                                  int t,
                                  real* DD,
                                  real* vz0,
@@ -1002,7 +1002,7 @@ extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
                                  unsigned int size_Mat, 
                                  bool isEvenTimestep);
 
-extern "C" void QADDev7(unsigned int numberOfThreads,
+void QADDev7(unsigned int numberOfThreads,
                         real* DD, 
                         real* DD7,
                         real* temp,
@@ -1019,7 +1019,7 @@ extern "C" void QADDev7(unsigned int numberOfThreads,
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief Advection Diffusion kernel
-extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
+void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 	uint numberOfThreads,
 	real omegaDiffusivity,
 	uint* typeOfGridNode,
@@ -1034,7 +1034,7 @@ extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief defines the behavior of a slip-AD boundary condition
-extern "C" void ADSlipVelDevComp(
+void ADSlipVelDevComp(
 	uint numberOfThreads,
 	real * normalX,
 	real * normalY,
@@ -1051,7 +1051,7 @@ extern "C" void ADSlipVelDevComp(
 	uint size_Mat,
 	bool isEvenTimestep);
 	
-extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
+void QADDirichletDev27( unsigned int numberOfThreads,
 								   real* DD, 
 								   real* DD27,
 								   real* temp,
@@ -1066,7 +1066,7 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
 								   unsigned int size_Mat, 
 								   bool isEvenTimestep);
 
-extern "C" void QADBBDev27(  unsigned int numberOfThreads,
+void QADBBDev27(  unsigned int numberOfThreads,
 							 real* DD, 
 							 real* DD27,
 							 real* temp,
@@ -1081,7 +1081,7 @@ extern "C" void QADBBDev27(  unsigned int numberOfThreads,
 							 unsigned int size_Mat, 
 							 bool isEvenTimestep);
 
-extern "C" void QADVelDev7(unsigned int numberOfThreads,
+void QADVelDev7(unsigned int numberOfThreads,
                            real* DD, 
                            real* DD7,
                            real* temp,
@@ -1098,7 +1098,7 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
                            bool isEvenTimestep);
 
 
-extern "C" void QADVelDev27(  unsigned int numberOfThreads,
+void QADVelDev27(  unsigned int numberOfThreads,
                               real* DD, 
                               real* DD27,
                               real* temp,
@@ -1114,7 +1114,7 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
                               unsigned int size_Mat, 
                               bool isEvenTimestep);
 
-extern "C" void QADPressDev7( unsigned int numberOfThreads,
+void QADPressDev7( unsigned int numberOfThreads,
                               real* DD, 
                               real* DD7,
                               real* temp,
@@ -1130,7 +1130,7 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
                               unsigned int size_Mat, 
                               bool isEvenTimestep);
 
-extern "C" void QADPressDev27(unsigned int numberOfThreads,
+void QADPressDev27(unsigned int numberOfThreads,
                               real* DD, 
                               real* DD27,
                               real* temp,
@@ -1146,7 +1146,7 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
                               unsigned int size_Mat, 
                               bool isEvenTimestep);
 
-extern "C" void QADPressNEQNeighborDev27(
+void QADPressNEQNeighborDev27(
 											unsigned int numberOfThreads,
 											real* DD,
 											real* DD27,
@@ -1160,7 +1160,7 @@ extern "C" void QADPressNEQNeighborDev27(
 											bool isEvenTimestep
 										);
 
-extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
+void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									real* DD, 
 									real* DD7,
 									real* temp,
@@ -1175,7 +1175,7 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QNoSlipADincompDev27(unsigned int numberOfThreads,
+void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 real* DD, 
 									 real* DD27,
 									 real* temp,
@@ -1190,7 +1190,7 @@ extern "C" void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 unsigned int size_Mat, 
 									 bool isEvenTimestep);
 
-extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
+void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   real* DD, 
 								   real* DD7,
 								   real* temp,
@@ -1207,7 +1207,7 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   bool isEvenTimestep);
 
 
-extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
+void QADVeloIncompDev27( unsigned int numberOfThreads,
 									real* DD, 
 									real* DD27,
 									real* temp,
@@ -1223,7 +1223,7 @@ extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
+void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 real* DD, 
 									 real* DD7,
 									 real* temp,
@@ -1239,7 +1239,7 @@ extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 unsigned int size_Mat, 
 									 bool isEvenTimestep);
 
-extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
+void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  real* DD, 
 									  real* DD27,
 									  real* temp,
@@ -1255,7 +1255,7 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  unsigned int size_Mat, 
 									  bool isEvenTimestep);
 
-extern "C" void PropVelo(   unsigned int numberOfThreads,
+void PropVelo(   unsigned int numberOfThreads,
 							unsigned int* neighborX,
 							unsigned int* neighborY,
 							unsigned int* neighborZ,
@@ -1270,7 +1270,7 @@ extern "C" void PropVelo(   unsigned int numberOfThreads,
 							real* DD,
 							bool EvenOrOdd);
 
-extern "C" void ScaleCF27( real* DC, 
+void ScaleCF27( real* DC, 
                            real* DF, 
                            unsigned int* neighborCX,
                            unsigned int* neighborCY,
@@ -1293,7 +1293,7 @@ extern "C" void ScaleCF27( real* DC,
                            unsigned int nyF,
                            unsigned int numberOfThreads);
 
-extern "C" void ScaleFC27( real* DC, 
+void ScaleFC27( real* DC, 
                            real* DF, 
                            unsigned int* neighborCX,
                            unsigned int* neighborCY,
@@ -1316,7 +1316,7 @@ extern "C" void ScaleFC27( real* DC,
                            unsigned int nyF,
                            unsigned int numberOfThreads);
 
-extern "C" void ScaleCFEff27(real* DC, 
+void ScaleCFEff27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -1340,7 +1340,7 @@ extern "C" void ScaleCFEff27(real* DC,
                              unsigned int numberOfThreads,
                              OffCF offCF);
 
-extern "C" void ScaleFCEff27(real* DC, 
+void ScaleFCEff27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -1364,7 +1364,7 @@ extern "C" void ScaleFCEff27(real* DC,
                              unsigned int numberOfThreads,
                              OffFC offFC);
 
-extern "C" void ScaleCFLast27(real* DC, 
+void ScaleCFLast27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1388,7 +1388,7 @@ extern "C" void ScaleCFLast27(real* DC,
                               unsigned int numberOfThreads,
                               OffCF offCF);
 
-extern "C" void ScaleFCLast27(real* DC, 
+void ScaleFCLast27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1412,7 +1412,7 @@ extern "C" void ScaleFCLast27(real* DC,
                               unsigned int numberOfThreads,
                               OffFC offFC);
 
-extern "C" void ScaleCFpress27(real* DC, 
+void ScaleCFpress27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1436,7 +1436,7 @@ extern "C" void ScaleCFpress27(real* DC,
                               unsigned int numberOfThreads,
                               OffCF offCF);
 
-extern "C" void ScaleFCpress27(  real* DC, 
+void ScaleFCpress27(  real* DC, 
                                  real* DF, 
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -1460,7 +1460,7 @@ extern "C" void ScaleFCpress27(  real* DC,
                                  unsigned int numberOfThreads,
                                  OffFC offFC);
 
-extern "C" void ScaleCF_Fix_27(real* DC, 
+void ScaleCF_Fix_27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1484,7 +1484,7 @@ extern "C" void ScaleCF_Fix_27(real* DC,
                               unsigned int numberOfThreads,
                               OffCF offCF);
 
-extern "C" void ScaleCF_Fix_comp_27(   real* DC, 
+void ScaleCF_Fix_comp_27(   real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1508,7 +1508,7 @@ extern "C" void ScaleCF_Fix_comp_27(   real* DC,
 									   unsigned int numberOfThreads,
 									   OffCF offCF);
 
-extern "C" void ScaleCF_0817_comp_27(  real* DC, 
+void ScaleCF_0817_comp_27(  real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1533,7 +1533,7 @@ extern "C" void ScaleCF_0817_comp_27(  real* DC,
 									   OffCF offCF,
 									   CUstream_st* stream);
 
-extern "C" void ScaleCF_comp_D3Q27F3_2018(	real* DC,
+void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											real* DF,
 											real* G6, 
 											unsigned int* neighborCX,
@@ -1558,7 +1558,7 @@ extern "C" void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											unsigned int numberOfThreads,
 											OffCF offCF);
 
-extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
+void ScaleCF_comp_D3Q27F3(real* DC,
 									 real* DF,
 									 real* G6, 
 									 unsigned int* neighborCX,
@@ -1584,7 +1584,7 @@ extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
 									 OffCF offCF,
 									 CUstream_st *stream);
 
-extern "C" void ScaleCF_staggered_time_comp_27( real* DC, 
+void ScaleCF_staggered_time_comp_27( real* DC, 
 												real* DF, 
 												unsigned int* neighborCX,
 												unsigned int* neighborCY,
@@ -1608,7 +1608,7 @@ extern "C" void ScaleCF_staggered_time_comp_27( real* DC,
 												unsigned int numberOfThreads,
 												OffCF offCF);
 
-extern "C" void ScaleCF_RhoSq_comp_27(  real* DC, 
+void ScaleCF_RhoSq_comp_27(  real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1633,7 +1633,7 @@ extern "C" void ScaleCF_RhoSq_comp_27(  real* DC,
 										OffCF offCF,
                                         CUstream_st *stream);
 
-extern "C" void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
+void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -1658,7 +1658,7 @@ extern "C" void ScaleCF_RhoSq_3rdMom_comp_27( real* DC,
 											  OffCF offCF,
 											  CUstream_st *stream);
 
-extern "C" void ScaleCF_AA2016_comp_27( real* DC, 
+void ScaleCF_AA2016_comp_27( real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1683,7 +1683,7 @@ extern "C" void ScaleCF_AA2016_comp_27( real* DC,
 										OffCF offCF,
 										CUstream_st *stream);
 
-extern "C" void ScaleCF_NSPress_27(real* DC, 
+void ScaleCF_NSPress_27(real* DC, 
 								  real* DF, 
 								  unsigned int* neighborCX,
 								  unsigned int* neighborCY,
@@ -1707,7 +1707,7 @@ extern "C" void ScaleCF_NSPress_27(real* DC,
 								  unsigned int numberOfThreads,
 								  OffCF offCF);
 
-extern "C" void ScaleFC_Fix_27(  real* DC, 
+void ScaleFC_Fix_27(  real* DC, 
                                  real* DF, 
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -1731,7 +1731,7 @@ extern "C" void ScaleFC_Fix_27(  real* DC,
                                  unsigned int numberOfThreads,
                                  OffFC offFC);
 
-extern "C" void ScaleFC_Fix_comp_27(   real* DC, 
+void ScaleFC_Fix_comp_27(   real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1755,7 +1755,7 @@ extern "C" void ScaleFC_Fix_comp_27(   real* DC,
 									   unsigned int numberOfThreads,
 									   OffFC offFC);
 
-extern "C" void ScaleFC_0817_comp_27(  real* DC, 
+void ScaleFC_0817_comp_27(  real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1780,7 +1780,7 @@ extern "C" void ScaleFC_0817_comp_27(  real* DC,
 									   OffFC offFC,
 									   CUstream_st *stream);
 
-extern "C" void ScaleFC_comp_D3Q27F3_2018(real* DC,
+void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  real* DF,
 										  real* G6,
 										  unsigned int* neighborCX,
@@ -1805,7 +1805,7 @@ extern "C" void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  unsigned int numberOfThreads,
 										  OffFC offFC);
 
-extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
+void ScaleFC_comp_D3Q27F3( real* DC,
 									  real* DF,
 									  real* G6,
 									  unsigned int* neighborCX,
@@ -1831,7 +1831,7 @@ extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
 									  OffFC offFC,
 									  CUstream_st *stream);
 
-extern "C" void ScaleFC_staggered_time_comp_27( real* DC, 
+void ScaleFC_staggered_time_comp_27( real* DC, 
 												real* DF, 
 												unsigned int* neighborCX,
 												unsigned int* neighborCY,
@@ -1855,7 +1855,7 @@ extern "C" void ScaleFC_staggered_time_comp_27( real* DC,
 												unsigned int numberOfThreads,
 												OffFC offFC);
 
-extern "C" void ScaleFC_RhoSq_comp_27(  real* DC, 
+void ScaleFC_RhoSq_comp_27(  real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1880,7 +1880,7 @@ extern "C" void ScaleFC_RhoSq_comp_27(  real* DC,
 	                                    OffFC offFC,
                                         CUstream_st *stream);
 
-extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
+void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -1905,7 +1905,7 @@ extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  OffFC offFC,
 											  CUstream_st *stream);
 
-extern "C" void ScaleFC_AA2016_comp_27( real* DC, 
+void ScaleFC_AA2016_comp_27( real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1930,7 +1930,7 @@ extern "C" void ScaleFC_AA2016_comp_27( real* DC,
 										OffFC offFC,
 										CUstream_st *stream);
 
-extern "C" void ScaleFC_NSPress_27(  real* DC, 
+void ScaleFC_NSPress_27(  real* DC, 
 									 real* DF, 
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -1954,7 +1954,7 @@ extern "C" void ScaleFC_NSPress_27(  real* DC,
 									 unsigned int numberOfThreads,
 									 OffFC offFC);
 
-extern "C" void ScaleCFThS7(  real* DC, 
+void ScaleCFThS7(  real* DC, 
                               real* DF, 
                               real* DD7C, 
                               real* DD7F,
@@ -1974,7 +1974,7 @@ extern "C" void ScaleCFThS7(  real* DC,
                               real diffusivity_fine,
                               unsigned int numberOfThreads);
 
-extern "C" void ScaleFCThS7(  real* DC, 
+void ScaleFCThS7(  real* DC, 
                               real* DF,
                               real* DD7C, 
                               real* DD7F,
@@ -1994,7 +1994,7 @@ extern "C" void ScaleFCThS7(  real* DC,
                               real diffusivity_coarse,
                               unsigned int numberOfThreads);
 
-extern "C" void ScaleCFThSMG7(   real* DC, 
+void ScaleCFThSMG7(   real* DC, 
                                  real* DF,
                                  real* DD7C, 
                                  real* DD7F,
@@ -2015,7 +2015,7 @@ extern "C" void ScaleCFThSMG7(   real* DC,
                                  unsigned int numberOfThreads,
                                  OffCF offCF);
 
-extern "C" void ScaleFCThSMG7(real* DC, 
+void ScaleFCThSMG7(real* DC, 
                               real* DF,
                               real* DD7C, 
                               real* DD7F,
@@ -2036,7 +2036,7 @@ extern "C" void ScaleFCThSMG7(real* DC,
                               unsigned int numberOfThreads,
                               OffFC offFC);
 
-extern "C" void ScaleCFThS27( real* DC, 
+void ScaleCFThS27( real* DC, 
                               real* DF, 
                               real* DD27C, 
                               real* DD27F,
@@ -2057,7 +2057,7 @@ extern "C" void ScaleCFThS27( real* DC,
 							  unsigned int numberOfThreads,
 							  OffCF offCF);
 
-extern "C" void ScaleFCThS27( real* DC, 
+void ScaleFCThS27( real* DC, 
                               real* DF,
                               real* DD27C, 
                               real* DD27F,
@@ -2078,7 +2078,7 @@ extern "C" void ScaleFCThS27( real* DC,
 							  unsigned int numberOfThreads,
 							  OffFC offFC);
 
-extern "C" void DragLiftPostD27(real* DD, 
+void DragLiftPostD27(real* DD, 
 								int* k_Q, 
 								real* QQ,
 								int numberOfBCnodes, 
@@ -2092,7 +2092,7 @@ extern "C" void DragLiftPostD27(real* DD,
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
-extern "C" void DragLiftPreD27( real* DD, 
+void DragLiftPreD27( real* DD, 
 								int* k_Q, 
 								real* QQ,
 								int numberOfBCnodes, 
@@ -2106,7 +2106,7 @@ extern "C" void DragLiftPreD27( real* DD,
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
-extern "C" void CalcCPtop27(real* DD, 
+void CalcCPtop27(real* DD, 
 							int* cpIndex, 
 							int nonCp, 
 							double *cpPress,
@@ -2117,7 +2117,7 @@ extern "C" void CalcCPtop27(real* DD,
 							bool isEvenTimestep,
 							unsigned int numberOfThreads);
 
-extern "C" void CalcCPbottom27(real* DD, 
+void CalcCPbottom27(real* DD, 
 							   int* cpIndex, 
 							   int nonCp, 
 							   double *cpPress,
@@ -2128,7 +2128,7 @@ extern "C" void CalcCPbottom27(real* DD,
 							   bool isEvenTimestep,
 							   unsigned int numberOfThreads);
 
-extern "C" void GetSendFsPreDev27(real* DD,
+void GetSendFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* sendIndex,
 								  int buffmax,
@@ -2140,7 +2140,7 @@ extern "C" void GetSendFsPreDev27(real* DD,
 								  unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void GetSendFsPostDev27(real* DD,
+void GetSendFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* sendIndex,
 								   int buffmax,
@@ -2152,7 +2152,7 @@ extern "C" void GetSendFsPostDev27(real* DD,
 								   unsigned int numberOfThreads, 
 	                               cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void SetRecvFsPreDev27(real* DD,
+void SetRecvFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* recvIndex,
 								  int buffmax,
@@ -2163,7 +2163,7 @@ extern "C" void SetRecvFsPreDev27(real* DD,
 								  bool isEvenTimestep, unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void SetRecvFsPostDev27(real* DD,
+void SetRecvFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* recvIndex,
 								   int buffmax,
@@ -2175,7 +2175,7 @@ extern "C" void SetRecvFsPostDev27(real* DD,
 								   unsigned int numberOfThreads,
                                    cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void getSendGsDevF3(
+void getSendGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -2187,7 +2187,7 @@ extern "C" void getSendGsDevF3(
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
-extern "C" void setRecvGsDevF3(
+void setRecvGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -2199,7 +2199,7 @@ extern "C" void setRecvGsDevF3(
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
-extern "C" void WallFuncDev27(unsigned int numberOfThreads,
+void WallFuncDev27(unsigned int numberOfThreads,
 							  real* vx,
 							  real* vy,
 							  real* vz,
@@ -2214,7 +2214,7 @@ extern "C" void WallFuncDev27(unsigned int numberOfThreads,
 							  unsigned int size_Mat, 
 							  bool isEvenTimestep);
 
-extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
+void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  real* vxD,
 										  real* vyD,
 										  real* vzD,
@@ -2233,7 +2233,7 @@ extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  real* DD,
 										  bool isEvenTimestep);
 
-extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
+void GetVelotoForce27(unsigned int numberOfThreads,
 								 real* DD, 
 								 int* bcIndex, 
 								 int nonAtBC, 
@@ -2246,7 +2246,7 @@ extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
 								 unsigned int size_Mat, 
 								 bool isEvenTimestep);
 
-extern "C" void InitParticlesDevice(real* coordX,
+void InitParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ, 
 									real* coordParticleXlocal,
@@ -2271,7 +2271,7 @@ extern "C" void InitParticlesDevice(real* coordX,
 									unsigned int size_Mat,
 									unsigned int numberOfThreads);
 
-extern "C" void MoveParticlesDevice(real* coordX,
+void MoveParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ, 
 									real* coordParticleXlocal,
@@ -2300,16 +2300,16 @@ extern "C" void MoveParticlesDevice(real* coordX,
 									unsigned int numberOfThreads,
 									bool isEvenTimestep);
 
-extern "C" void initRandomDevice(curandState* state,
+void initRandomDevice(curandState* state,
 								 unsigned int size_Mat,
 								 unsigned int numberOfThreads);
 
-extern "C" void generateRandomValuesDevice(curandState* state,
+void generateRandomValuesDevice(curandState* state,
 										   unsigned int size_Mat,
 										   real* randArray,
 										   unsigned int numberOfThreads);
 
-extern "C" void CalcTurbulenceIntensityDevice(
+void CalcTurbulenceIntensityDevice(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index 84d542e9e7f205c41bc6cb5dab38eb219c1212be..5363a5c4ed3fe8de9dc6c0511d82f6ed04cb855e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -11,10 +11,12 @@
 //random numbers
 #include <curand.h>
 #include <curand_kernel.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
 
 #include "LBM/LB.h"
 
-extern "C" __global__ void LB_Kernel_Casc27(real s9,
+__global__ void LB_Kernel_Casc27(real s9,
                                             unsigned int* bcMatD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
@@ -23,7 +25,7 @@ extern "C" __global__ void LB_Kernel_Casc27(real s9,
                                             int size_Mat,
                                             bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Casc_SP_27(  real s9,
+__global__ void LB_Kernel_Casc_SP_27(  real s9,
                                                   unsigned int* bcMatD,
                                                   unsigned int* neighborX,
                                                   unsigned int* neighborY,
@@ -32,7 +34,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real s9,
                                                   int size_Mat,
                                                   bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
+__global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
                                                       unsigned int* bcMatD,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
@@ -41,7 +43,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
                                                       int size_Mat,
                                                       bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
+__global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          unsigned int* bcMatD,
                                                          unsigned int* neighborX,
                                                          unsigned int* neighborY,
@@ -50,7 +52,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          int size_Mat,
                                                          bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
+__global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -62,7 +64,7 @@ extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
+__global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -74,7 +76,7 @@ extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
 														bool EvenOrOdd);
 
 
-extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
+__global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 																unsigned int* bcMatD,
 																unsigned int* neighborX,
 																unsigned int* neighborY,
@@ -87,7 +89,7 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 
 
 
-extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
+__global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													real deltaPhi,
 													real angularVelocity,
 													unsigned int* bcMatD,
@@ -101,7 +103,7 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Cascade_SP_27( real s9,
+__global__ void LB_Kernel_Cascade_SP_27( real s9,
 													unsigned int* bcMatD,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -110,7 +112,7 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27( real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_New_SP_27( real s9,
+__global__ void LB_Kernel_Kum_New_SP_27( real s9,
 													unsigned int* bcMatD,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -119,7 +121,7 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27( real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
+__global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -131,7 +133,7 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														int size_Mat,
 														bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
+__global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
 													unsigned int* bcMatD,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -140,7 +142,7 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -152,7 +154,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -164,7 +166,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
+__global__ void Cumulant_One_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -186,7 +188,7 @@ inline __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, rea
 
 
 
-extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
+__global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -204,7 +206,7 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	bool EvenOrOdd);
 
 
-extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
+__global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
 															unsigned int* neighborZ,
@@ -219,7 +221,7 @@ extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
 															unsigned int* nodeIdsPorousMedia,
 															bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
 												  unsigned int* bcMatD,
 												  unsigned int* neighborX,
 												  unsigned int* neighborY,
@@ -229,7 +231,7 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
 												  int size_Mat,
 												  bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
 												   unsigned int* bcMatD,
 												   unsigned int* neighborX,
 												   unsigned int* neighborY,
@@ -239,7 +241,7 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
 												   int size_Mat,
 												   bool EvenOrOdd);
 
-extern "C" __global__ void LBInit27( int myid,
+__global__ void LBInit27( int myid,
                                      int numprocs,
                                      real u0,
                                      unsigned int* geoD,
@@ -255,7 +257,7 @@ extern "C" __global__ void LBInit27( int myid,
                                      int lev,
                                      int maxlev);
 
-extern "C" __global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
+__global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
                                                unsigned int* neighborWSB,
@@ -269,7 +271,7 @@ extern "C" __global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
                                                real omega,
                                                bool EvenOrOdd);
 
-extern "C" __global__ void InitAD7( unsigned int* neighborX,
+__global__ void InitAD7( unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        unsigned int* geoD,
@@ -281,7 +283,7 @@ extern "C" __global__ void InitAD7( unsigned int* neighborX,
                                        real* DD7,
                                        bool EvenOrOdd);
 
-extern "C" __global__ void InitAD27(unsigned int* neighborX,
+__global__ void InitAD27(unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        unsigned int* geoD,
@@ -293,7 +295,7 @@ extern "C" __global__ void InitAD27(unsigned int* neighborX,
                                        real* DD27,
                                        bool EvenOrOdd);
 
-extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(
+__global__ void LB_PostProcessor_F3_2018_Fehlberg(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -310,7 +312,7 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcMac27( real* vxD,
+__global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
                                         real* vzD,
                                         real* rhoD,
@@ -322,7 +324,7 @@ extern "C" __global__ void LBCalcMac27( real* vxD,
                                         real* DD,
                                         bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMacSP27( real* vxD,
+__global__ void LBCalcMacSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -335,7 +337,7 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
                                           real* DD,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMacCompSP27( real* vxD,
+__global__ void LBCalcMacCompSP27( real* vxD,
 											  real* vyD,
 											  real* vzD,
 											  real* rhoD,
@@ -348,7 +350,7 @@ extern "C" __global__ void LBCalcMacCompSP27( real* vxD,
 											  real* DD,
 											  bool isEvenTimestep);
 
-extern "C" __global__ void CalcConc7( real* Conc,
+__global__ void CalcConc7( real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -357,7 +359,7 @@ extern "C" __global__ void CalcConc7( real* Conc,
                                           real* DD7,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void GetPlaneConc7(real* Conc,
+__global__ void GetPlaneConc7(real* Conc,
 								            int* kPC,
 								            unsigned int numberOfPointskPC,
 											unsigned int* geoD,
@@ -368,7 +370,7 @@ extern "C" __global__ void GetPlaneConc7(real* Conc,
 											real* DD7,
 											bool isEvenTimestep);
 
-extern "C" __global__ void GetPlaneConc27(real* Conc,
+__global__ void GetPlaneConc27(real* Conc,
 								             int* kPC,
 								             unsigned int numberOfPointskPC,
 											 unsigned int* geoD,
@@ -379,7 +381,7 @@ extern "C" __global__ void GetPlaneConc27(real* Conc,
 											 real* DD27,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void CalcConc27(real* Conc,
+__global__ void CalcConc27(real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -388,7 +390,7 @@ extern "C" __global__ void CalcConc27(real* Conc,
                                           real* DD27,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMedSP27( real* vxD,
+__global__ void LBCalcMedSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -401,7 +403,7 @@ extern "C" __global__ void LBCalcMedSP27( real* vxD,
                                           real* DD,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
+__global__ void LBCalcMedCompSP27( real* vxD,
 											  real* vyD,
 											  real* vzD,
 											  real* rhoD,
@@ -414,7 +416,7 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
 											  real* DD,
 											  bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMedCompAD27(
+__global__ void LBCalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -430,7 +432,7 @@ extern "C" __global__ void LBCalcMedCompAD27(
 	real* DD_AD,
 	bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
+__global__ void LBCalcMacMedSP27( real* vxD,
                                              real* vyD,
                                              real* vzD,
                                              real* rhoD,
@@ -443,7 +445,7 @@ extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void LBResetMedianValuesSP27(
+__global__ void LBResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -452,7 +454,7 @@ extern "C" __global__ void LBResetMedianValuesSP27(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void LBResetMedianValuesAD27(
+__global__ void LBResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -462,7 +464,7 @@ extern "C" __global__ void LBResetMedianValuesAD27(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														real* kyzFromfcNEQ,
 														real* kxzFromfcNEQ,
 														real* kxxMyyFromfcNEQ,
@@ -475,7 +477,7 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														real* DD,
 														bool isEvenTimestep);
 
-extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													real* kyzFromfcNEQ,
 													real* kxzFromfcNEQ,
 													real* kxxMyyFromfcNEQ,
@@ -488,7 +490,7 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													real* DD,
 													bool isEvenTimestep);
 
-extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
+__global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														real* CUMabc,
 														real* CUMbac,
 														real* CUMbca,
@@ -503,7 +505,7 @@ extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														int size_Mat,
 														bool EvenOrOdd);
 
-extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
+__global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													real* CUMabc,
 													real* CUMbac,
 													real* CUMbca,
@@ -518,7 +520,7 @@ extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
+__global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															real* CUMbcb,
 															real* CUMbbc,
 															real* CUMcca,
@@ -536,7 +538,7 @@ extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															int size_Mat,
 															bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
+__global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														real* CUMbcb,
 														real* CUMbbc,
 														real* CUMcca,
@@ -554,7 +556,7 @@ extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														int size_Mat,
 														bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcMeasurePoints(real* vxMP,
+__global__ void LBCalcMeasurePoints(real* vxMP,
                                                real* vyMP,
                                                real* vzMP,
                                                real* rhoMP,
@@ -570,7 +572,7 @@ extern "C" __global__ void LBCalcMeasurePoints(real* vxMP,
                                                real* DD,
                                                bool isEvenTimestep);
 
-extern "C" __global__ void LB_BC_Press_East27( int nx,
+__global__ void LB_BC_Press_East27( int nx,
                                                int ny,
                                                int tz,
                                                unsigned int* bcMatD,
@@ -581,7 +583,7 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
                                                unsigned int size_Mat,
                                                bool isEvenTimestep) ;
 
-extern "C" __global__ void LB_BC_Vel_West_27( int nx,
+__global__ void LB_BC_Vel_West_27( int nx,
                                               int ny,
                                               int nz,
                                               int itz,
@@ -598,7 +600,7 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
                                               real om);
 
 //no Slip BCs
-extern "C" __global__ void QDevice27(real* distributions,
+__global__ void QDevice27(real* distributions,
                                      int* subgridDistanceIndices,
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes,
@@ -609,7 +611,7 @@ extern "C" __global__ void QDevice27(real* distributions,
                                      unsigned int numberOfLBnodes,
                                      bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceComp27(
+__global__ void QDeviceComp27(
 										 real* distributions,
 										 int* subgridDistanceIndices,
 										 real* subgridDistances,
@@ -621,7 +623,7 @@ extern "C" __global__ void QDeviceComp27(
 										 unsigned int numberOfLBnodes,
 										 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceCompThinWallsPartOne27(real* DD,
+__global__ void QDeviceCompThinWallsPartOne27(real* DD,
 														 int* k_Q,
 														 real* QQ,
 														 unsigned int numberOfBCnodes,
@@ -632,7 +634,7 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(real* DD,
 														 unsigned int size_Mat,
 														 bool isEvenTimestep);
 
-extern "C" __global__ void QDevice3rdMomentsComp27(	 real* distributions, 
+__global__ void QDevice3rdMomentsComp27(	 real* distributions, 
 													 int* subgridDistanceIndices, 
 													 real* subgridDistances,
 													 unsigned int numberOfBCnodes, 
@@ -643,7 +645,7 @@ extern "C" __global__ void QDevice3rdMomentsComp27(	 real* distributions,
 													 unsigned int numberOfLBnodes, 
 													 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceIncompHighNu27(real* DD,
+__global__ void QDeviceIncompHighNu27(real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -651,10 +653,10 @@ extern "C" __global__ void QDeviceIncompHighNu27(real* DD,
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
+												 unsigned int numberOfLBnodes,
 												 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceCompHighNu27(	 real* DD,
+__global__ void QDeviceCompHighNu27(	 real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -666,7 +668,7 @@ extern "C" __global__ void QDeviceCompHighNu27(	 real* DD,
 												 bool isEvenTimestep);
 
 //Velocity BCs
-extern "C" __global__ void QVelDevPlainBB27(
+__global__ void QVelDevPlainBB27(
     real* velocityX,
     real* velocityY,
     real* velocityZ,
@@ -680,7 +682,7 @@ extern "C" __global__ void QVelDevPlainBB27(
     uint numberOfLBnodes,
     bool isEvenTimestep);
 
-extern "C" __global__ void QVelDevCouette27(real* vx,
+__global__ void QVelDevCouette27(real* vx,
 											real* vy,
 											real* vz,
 											real* DD,
@@ -694,7 +696,7 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QVelDev1h27( int inx,
+__global__ void QVelDev1h27( int inx,
 										int iny,
 										real* vx,
 										real* vy,
@@ -715,7 +717,7 @@ extern "C" __global__ void QVelDev1h27( int inx,
 										unsigned int size_Mat,
 										bool isEvenTimestep);
 
-extern "C" __global__ void QVelDevice27(int inx,
+__global__ void QVelDevice27(int inx,
                                         int iny,
                                         real* vx,
                                         real* vy,
@@ -731,7 +733,7 @@ extern "C" __global__ void QVelDevice27(int inx,
                                         unsigned int size_Mat,
                                         bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompPlusSlip27(real* vx,
+__global__ void QVelDeviceCompPlusSlip27(real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
@@ -745,7 +747,7 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(real* vx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceComp27(real* velocityX,
+__global__ void QVelDeviceComp27(real* velocityX,
 											real* velocityY,
 											real* velocityZ,
 											real* distribution,
@@ -759,7 +761,7 @@ extern "C" __global__ void QVelDeviceComp27(real* velocityX,
 											unsigned int numberOfLBnodes,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
+__global__ void QVelDeviceCompThinWallsPartOne27(
 	real* vx,
 	real* vy,
 	real* vz,
@@ -774,7 +776,7 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QThinWallsPartTwo27(
+__global__ void QThinWallsPartTwo27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
@@ -787,7 +789,7 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompZeroPress27(
+__global__ void QVelDeviceCompZeroPress27(
 	real* velocityX,
 	real* velocityY,
 	real* velocityZ,
@@ -802,7 +804,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(
 	unsigned int numberOfLBnodes,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceIncompHighNu27(real* vx,
+__global__ void QVelDeviceIncompHighNu27(real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
@@ -816,7 +818,7 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(real* vx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompHighNu27(	real* vx,
+__global__ void QVelDeviceCompHighNu27(	real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
@@ -830,7 +832,7 @@ extern "C" __global__ void QVelDeviceCompHighNu27(	real* vx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
+__global__ void QVeloDeviceEQ27(real* VeloX,
 										   real* VeloY,
 										   real* VeloZ,
                                            real* DD,
@@ -843,7 +845,7 @@ extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void QVeloStreetDeviceEQ27(
+__global__ void QVeloStreetDeviceEQ27(
 	real* veloXfraction,
 	real* veloYfraction,
 	int*  naschVelo,
@@ -858,7 +860,7 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 	bool  isEvenTimestep);
 
 //Slip BCs
-extern "C" __global__ void QSlipDevice27(real* DD,
+__global__ void QSlipDevice27(real* DD,
                                          int* k_Q,
                                          real* QQ,
                                          unsigned int numberOfBCnodes,
@@ -869,7 +871,7 @@ extern "C" __global__ void QSlipDevice27(real* DD,
                                          unsigned int size_Mat,
                                          bool isEvenTimestep);
 
-extern "C" __global__ void QSlipDeviceComp27(real* DD,
+__global__ void QSlipDeviceComp27(real* DD,
 											 int* k_Q,
 											 real* QQ,
 											 unsigned int numberOfBCnodes,
@@ -880,7 +882,7 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
+__global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 											 int* k_Q,
 											 real* QQ,
 											 unsigned int numberOfBCnodes,
@@ -892,7 +894,7 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
+__global__ void QSlipGeomDeviceComp27(real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -906,7 +908,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 												 unsigned int size_Mat,
 												 bool isEvenTimestep);
 
-extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
+__global__ void QSlipNormDeviceComp27(real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -921,7 +923,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 												 bool isEvenTimestep);
 
 // Stress BCs (wall model)
-extern "C" __global__ void QStressDeviceComp27(real* DD,
+__global__ void QStressDeviceComp27(real* DD,
 											   int* k_Q,
 											 int* k_N,
 											 real* QQ,
@@ -953,7 +955,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void BBStressDevice27( real* DD,
+__global__ void BBStressDevice27( real* DD,
 												int* k_Q,
 												int* k_N,
 												real* QQ,
@@ -984,7 +986,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
 												bool isEvenTimestep);
 
 //Pressure BCs
-extern "C" __global__ void QPressDevice27( real* rhoBC,
+__global__ void QPressDevice27( real* rhoBC,
                                            real* DD,
                                            int* k_Q,
                                            real* QQ,
@@ -996,7 +998,7 @@ extern "C" __global__ void QPressDevice27( real* rhoBC,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
+__global__ void QPressDeviceAntiBB27(   real* rhoBC,
 												   real* vx,
 												   real* vy,
 												   real* vz,
@@ -1011,7 +1013,7 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 												   unsigned int size_Mat,
 												   bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
+__global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       real* DD,
                                                       int* k_Q,
                                                       int numberOfBCnodes,
@@ -1022,7 +1024,7 @@ extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       unsigned int size_Mat,
                                                       bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
+__global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      real* DD,
                                                      int* k_Q,
                                                      int numberOfBCnodes,
@@ -1033,7 +1035,7 @@ extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      unsigned int size_Mat,
                                                      bool isEvenTimestep);
 
-extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
+__global__ void QPressNoRhoDevice27(  real* rhoBC,
 												 real* DD,
 												 int* k_Q,
 												 int* k_N,
@@ -1045,7 +1047,7 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
 												 unsigned int size_Mat,
 												 bool isEvenTimestep);
 
-extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
+__global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 														 real* DD,
 														 int* k_Q,
 														 int* k_N,
@@ -1057,7 +1059,7 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 														 unsigned int size_Mat,
 														 bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
+__global__ void QPressDeviceOld27(real* rhoBC,
                                              real* DD,
                                              int* k_Q,
                                              int* k_N,
@@ -1069,7 +1071,7 @@ extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
+__global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													real* DD,
 													int* k_Q,
 													int* k_N,
@@ -1081,7 +1083,7 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
+__global__ void QPressDeviceNEQ27(real* rhoBC,
                                              real* distribution,
                                              int* bcNodeIndices,
                                              int* bcNeighborIndices,
@@ -1093,7 +1095,7 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
+__global__ void QPressDeviceEQZ27(real* rhoBC,
                                              real* DD,
                                              int* k_Q,
                                              int* k_N,
@@ -1106,7 +1108,7 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceZero27(  real* DD,
+__global__ void QPressDeviceZero27(  real* DD,
 												int* k_Q,
 												unsigned int numberOfBCnodes,
 												unsigned int* neighborX,
@@ -1115,7 +1117,7 @@ extern "C" __global__ void QPressDeviceZero27(  real* DD,
 												unsigned int size_Mat,
 												bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceFake27(real* rhoBC,
+__global__ void QPressDeviceFake27(real* rhoBC,
                                              real* DD,
                                              int* k_Q,
                                              int* k_N,
@@ -1127,7 +1129,7 @@ extern "C" __global__ void QPressDeviceFake27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void BBDevice27(real* distributions,
+__global__ void BBDevice27(real* distributions,
                                      int* subgridDistanceIndices,
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes,
@@ -1137,7 +1139,7 @@ extern "C" __global__ void BBDevice27(real* distributions,
                                      unsigned int numberOfLBnodes,
                                      bool isEvenTimestep);
 
-extern "C" __global__ void QPressDevice27_IntBB(real* rho,
+__global__ void QPressDevice27_IntBB(real* rho,
 												real* DD,
 												int* k_Q,
 												real* QQ,
@@ -1151,7 +1153,7 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //Schlaffer BCs
-extern "C" __global__ void PressSchlaff27(real* rhoBC,
+__global__ void PressSchlaff27(real* rhoBC,
                                           real* DD,
                                           real* vx0,
                                           real* vy0,
@@ -1168,7 +1170,7 @@ extern "C" __global__ void PressSchlaff27(real* rhoBC,
                                           bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
-extern "C" __global__ void VelSchlaff27(  int t,
+__global__ void VelSchlaff27(  int t,
                                           real* DD,
                                           real* vz0,
                                           real* deltaVz0,
@@ -1183,7 +1185,7 @@ extern "C" __global__ void VelSchlaff27(  int t,
                                           bool isEvenTimestep);
 
 //Advection / Diffusion BCs
-extern "C" __global__ void QAD7( real* DD,
+__global__ void QAD7( real* DD,
                                  real* DD7,
                                  real* temp,
                                  real diffusivity,
@@ -1199,7 +1201,7 @@ extern "C" __global__ void QAD7( real* DD,
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref Advection_Diffusion_Device_Kernel : Factorized central moments for Advection Diffusion Equation
-extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
+__global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 	real omegaDiffusivity,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -1213,7 +1215,7 @@ extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref AD_SlipVelDeviceComp : device function for the slip-AD boundary condition
-extern "C" __global__ void AD_SlipVelDeviceComp(
+__global__ void AD_SlipVelDeviceComp(
 	real * normalX,
 	real * normalY,
 	real * normalZ,
@@ -1229,7 +1231,7 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QADDirichlet27(   real* DD,
+__global__ void QADDirichlet27(   real* DD,
 											 real* DD27,
 											 real* temp,
 											 real diffusivity,
@@ -1243,7 +1245,7 @@ extern "C" __global__ void QADDirichlet27(   real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QADBB27(  real* DD,
+__global__ void QADBB27(  real* DD,
 									 real* DD27,
 									 real* temp,
 									 real diffusivity,
@@ -1257,7 +1259,7 @@ extern "C" __global__ void QADBB27(  real* DD,
 									 unsigned int size_Mat,
 									 bool isEvenTimestep);
 
-extern "C" __global__ void QADVel7( real* DD,
+__global__ void QADVel7( real* DD,
                                     real* DD7,
                                     real* temp,
                                     real* velo,
@@ -1272,7 +1274,7 @@ extern "C" __global__ void QADVel7( real* DD,
                                     unsigned int size_Mat,
                                     bool isEvenTimestep);
 
-extern "C" __global__ void QADVel27(real* DD,
+__global__ void QADVel27(real* DD,
                                     real* DD27,
                                     real* temp,
                                     real* velo,
@@ -1287,7 +1289,7 @@ extern "C" __global__ void QADVel27(real* DD,
                                     unsigned int size_Mat,
                                     bool isEvenTimestep);
 
-extern "C" __global__ void QADPress7(  real* DD,
+__global__ void QADPress7(  real* DD,
                                        real* DD7,
                                        real* temp,
                                        real* velo,
@@ -1302,7 +1304,7 @@ extern "C" __global__ void QADPress7(  real* DD,
                                        unsigned int size_Mat,
                                        bool isEvenTimestep);
 
-extern "C" __global__ void QADPress27( real* DD,
+__global__ void QADPress27( real* DD,
                                        real* DD27,
                                        real* temp,
                                        real* velo,
@@ -1317,7 +1319,7 @@ extern "C" __global__ void QADPress27( real* DD,
                                        unsigned int size_Mat,
                                        bool isEvenTimestep);
 
-extern "C" __global__ void QADPressNEQNeighbor27(
+__global__ void QADPressNEQNeighbor27(
 												 real* DD,
 												 real* DD27,
 												 int* k_Q,
@@ -1330,7 +1332,7 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 												 bool isEvenTimestep
 												);
 
-extern "C" __global__ void QNoSlipADincomp7( real* DD,
+__global__ void QNoSlipADincomp7( real* DD,
 											 real* DD7,
 											 real* temp,
 											 real diffusivity,
@@ -1344,7 +1346,7 @@ extern "C" __global__ void QNoSlipADincomp7( real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QNoSlipADincomp27( real* DD,
+__global__ void QNoSlipADincomp27( real* DD,
 											 real* DD27,
 											 real* temp,
 											 real diffusivity,
@@ -1358,7 +1360,7 @@ extern "C" __global__ void QNoSlipADincomp27( real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QADVeloIncomp7(  real* DD,
+__global__ void QADVeloIncomp7(  real* DD,
 											real* DD7,
 											real* temp,
 											real* velo,
@@ -1373,7 +1375,7 @@ extern "C" __global__ void QADVeloIncomp7(  real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QADVeloIncomp27( real* DD,
+__global__ void QADVeloIncomp27( real* DD,
 											real* DD27,
 											real* temp,
 											real* velo,
@@ -1388,7 +1390,7 @@ extern "C" __global__ void QADVeloIncomp27( real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QADPressIncomp7(real* DD,
+__global__ void QADPressIncomp7(real* DD,
 										   real* DD7,
 										   real* temp,
 										   real* velo,
@@ -1403,7 +1405,7 @@ extern "C" __global__ void QADPressIncomp7(real* DD,
 										   unsigned int size_Mat,
 										   bool isEvenTimestep);
 
-extern "C" __global__ void QADPressIncomp27(   real* DD,
+__global__ void QADPressIncomp27(   real* DD,
 											   real* DD27,
 											   real* temp,
 											   real* velo,
@@ -1419,7 +1421,7 @@ extern "C" __global__ void QADPressIncomp27(   real* DD,
 											   bool isEvenTimestep);
 
 //Propeller BC
-extern "C" __global__ void PropellerBC(unsigned int* neighborX,
+__global__ void PropellerBC(unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        real* rho,
@@ -1436,7 +1438,7 @@ extern "C" __global__ void PropellerBC(unsigned int* neighborX,
 
 
 //coarse to fine
-extern "C" __global__ void scaleCF27(real* DC,
+__global__ void scaleCF27(real* DC,
                                      real* DF,
                                     unsigned int* neighborCX,
                                     unsigned int* neighborCY,
@@ -1458,7 +1460,7 @@ extern "C" __global__ void scaleCF27(real* DC,
 										       unsigned int nxF,
 										       unsigned int nyF);
 
-extern "C" __global__ void scaleCFEff27(real* DC,
+__global__ void scaleCFEff27(real* DC,
                                         real* DF,
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -1481,7 +1483,7 @@ extern "C" __global__ void scaleCFEff27(real* DC,
                                         unsigned int nyF,
                                         OffCF offCF);
 
-extern "C" __global__ void scaleCFLast27( real* DC,
+__global__ void scaleCFLast27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1504,7 +1506,7 @@ extern "C" __global__ void scaleCFLast27( real* DC,
                                           unsigned int nyF,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCFpress27(real* DC,
+__global__ void scaleCFpress27(real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1527,7 +1529,7 @@ extern "C" __global__ void scaleCFpress27(real* DC,
                                           unsigned int nyF,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCF_Fix_27(real* DC,
+__global__ void scaleCF_Fix_27(real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1550,7 +1552,7 @@ extern "C" __global__ void scaleCF_Fix_27(real* DC,
                                           unsigned int nyF,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCF_Fix_comp_27(   real* DC,
+__global__ void scaleCF_Fix_comp_27(   real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1573,7 +1575,7 @@ extern "C" __global__ void scaleCF_Fix_comp_27(   real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_0817_comp_27(  real* DC,
+__global__ void scaleCF_0817_comp_27(  real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1596,7 +1598,7 @@ extern "C" __global__ void scaleCF_0817_comp_27(  real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
+__global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
 													  real* DF,
 													  real* G6,
 													  unsigned int* neighborCX,
@@ -1620,7 +1622,7 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
 													  unsigned int nyF,
 													  OffCF offCF);
 
-extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
+__global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
@@ -1645,7 +1647,7 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 OffCF offCF);
 
 
-extern "C" __global__ void scaleCF_staggered_time_comp_27(real* DC,
+__global__ void scaleCF_staggered_time_comp_27(real* DC,
 														  real* DF,
 														  unsigned int* neighborCX,
 														  unsigned int* neighborCY,
@@ -1668,7 +1670,7 @@ extern "C" __global__ void scaleCF_staggered_time_comp_27(real* DC,
 														  unsigned int nyF,
 														  OffCF offCF);
 
-extern "C" __global__ void scaleCF_RhoSq_comp_27( real* DC,
+__global__ void scaleCF_RhoSq_comp_27( real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1691,7 +1693,7 @@ extern "C" __global__ void scaleCF_RhoSq_comp_27( real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
+__global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														real* DF,
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -1714,7 +1716,7 @@ extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int nyF,
 														OffCF offCF);
 
-extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
+__global__ void scaleCF_AA2016_comp_27(real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1737,7 +1739,7 @@ extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_NSPress_27(real* DC,
+__global__ void scaleCF_NSPress_27(real* DC,
 											  real* DF,
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -1760,7 +1762,7 @@ extern "C" __global__ void scaleCF_NSPress_27(real* DC,
 											  unsigned int nyF,
 											  OffCF offCF);
 
-extern "C" __global__ void scaleCFThSMG7( real* DC,
+__global__ void scaleCFThSMG7( real* DC,
                                           real* DF,
                                           real* DD7C,
                                           real* DD7F,
@@ -1780,7 +1782,7 @@ extern "C" __global__ void scaleCFThSMG7( real* DC,
                                           real diffusivity_fine,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCFThS7(real* DC,
+__global__ void scaleCFThS7(real* DC,
                                        real* DF,
                                        real* DD7C,
                                        real* DD7F,
@@ -1799,7 +1801,7 @@ extern "C" __global__ void scaleCFThS7(real* DC,
                                        real nu,
                                        real diffusivity_fine);
 
-extern "C" __global__ void scaleCFThS27(real* DC,
+__global__ void scaleCFThS27(real* DC,
                                         real* DF,
                                         real* DD27C,
                                         real* DD27F,
@@ -1820,7 +1822,7 @@ extern "C" __global__ void scaleCFThS27(real* DC,
 										OffCF offCF);
 
 //fine to coarse
-extern "C" __global__ void scaleFC27(real* DC,
+__global__ void scaleFC27(real* DC,
                                      real* DF,
                                     unsigned int* neighborCX,
                                     unsigned int* neighborCY,
@@ -1842,7 +1844,7 @@ extern "C" __global__ void scaleFC27(real* DC,
 										       unsigned int nxF,
                                      unsigned int nyF);
 
-extern "C" __global__ void scaleFCEff27(real* DC,
+__global__ void scaleFCEff27(real* DC,
                                         real* DF,
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -1865,7 +1867,7 @@ extern "C" __global__ void scaleFCEff27(real* DC,
                                         unsigned int nyF,
                                         OffFC offFC);
 
-extern "C" __global__ void scaleFCLast27( real* DC,
+__global__ void scaleFCLast27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1888,7 +1890,7 @@ extern "C" __global__ void scaleFCLast27( real* DC,
                                           unsigned int nyF,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFCpress27( real* DC,
+__global__ void scaleFCpress27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1911,7 +1913,7 @@ extern "C" __global__ void scaleFCpress27( real* DC,
                                           unsigned int nyF,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFC_Fix_27( real* DC,
+__global__ void scaleFC_Fix_27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1934,7 +1936,7 @@ extern "C" __global__ void scaleFC_Fix_27( real* DC,
                                           unsigned int nyF,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFC_Fix_comp_27(   real* DC,
+__global__ void scaleFC_Fix_comp_27(   real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1957,7 +1959,7 @@ extern "C" __global__ void scaleFC_Fix_comp_27(   real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_0817_comp_27(  real* DC,
+__global__ void scaleFC_0817_comp_27(  real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1980,7 +1982,7 @@ extern "C" __global__ void scaleFC_0817_comp_27(  real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
+__global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
 													  real* DF,
 													  real* G6,
 													  unsigned int* neighborCX,
@@ -2004,7 +2006,7 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
 													  unsigned int nyF,
 													  OffFC offFC);
 
-extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
+__global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
@@ -2029,7 +2031,7 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 OffFC offFC);
 
 
-extern "C" __global__ void scaleFC_staggered_time_comp_27(real* DC,
+__global__ void scaleFC_staggered_time_comp_27(real* DC,
 														  real* DF,
 														  unsigned int* neighborCX,
 														  unsigned int* neighborCY,
@@ -2052,7 +2054,7 @@ extern "C" __global__ void scaleFC_staggered_time_comp_27(real* DC,
 														  unsigned int nyF,
 														  OffFC offFC);
 
-extern "C" __global__ void scaleFC_RhoSq_comp_27( real* DC,
+__global__ void scaleFC_RhoSq_comp_27( real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -2075,7 +2077,7 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27( real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
+__global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														real* DF,
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -2098,7 +2100,7 @@ extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int nyF,
 														OffFC offFC);
 
-extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
+__global__ void scaleFC_AA2016_comp_27(real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -2121,7 +2123,7 @@ extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_NSPress_27(real* DC,
+__global__ void scaleFC_NSPress_27(real* DC,
 											  real* DF,
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -2144,7 +2146,7 @@ extern "C" __global__ void scaleFC_NSPress_27(real* DC,
 											  unsigned int nyF,
 											  OffFC offFC);
 
-extern "C" __global__ void scaleFCThSMG7( real* DC,
+__global__ void scaleFCThSMG7( real* DC,
                                           real* DF,
                                           real* DD7C,
                                           real* DD7F,
@@ -2164,7 +2166,7 @@ extern "C" __global__ void scaleFCThSMG7( real* DC,
                                           real diffusivity_coarse,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFCThS7(real* DC,
+__global__ void scaleFCThS7(real* DC,
                                        real* DF,
                                        real* DD7C,
                                        real* DD7F,
@@ -2183,7 +2185,7 @@ extern "C" __global__ void scaleFCThS7(real* DC,
                                        real nu,
                                        real diffusivity_coarse);
 
-extern "C" __global__ void scaleFCThS27(  real* DC,
+__global__ void scaleFCThS27(  real* DC,
                                           real* DF,
                                           real* DD27C,
                                           real* DD27F,
@@ -2203,7 +2205,7 @@ extern "C" __global__ void scaleFCThS27(  real* DC,
                                           real diffusivity_coarse,
 										  OffFC offFC);
 
-extern "C" __global__ void DragLiftPost27(  real* DD,
+__global__ void DragLiftPost27(  real* DD,
 											int* k_Q,
 											real* QQ,
 											int numberOfBCnodes,
@@ -2216,7 +2218,7 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void DragLiftPre27(   real* DD,
+__global__ void DragLiftPre27(   real* DD,
 											int* k_Q,
 											real* QQ,
 											int numberOfBCnodes,
@@ -2229,7 +2231,7 @@ extern "C" __global__ void DragLiftPre27(   real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void CalcCP27(real* DD,
+__global__ void CalcCP27(real* DD,
 									int* cpIndex,
 									int nonCp,
 									double *cpPress,
@@ -2239,7 +2241,7 @@ extern "C" __global__ void CalcCP27(real* DD,
 									unsigned int size_Mat,
 									bool isEvenTimestep);
 
-extern "C" __global__ void getSendFsPre27(real* DD,
+__global__ void getSendFsPre27(real* DD,
 										  real* bufferFs,
 										  int* sendIndex,
                                           int buffmax,
@@ -2249,7 +2251,7 @@ extern "C" __global__ void getSendFsPre27(real* DD,
                                           unsigned int size_Mat,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void getSendFsPost27(real* DD,
+__global__ void getSendFsPost27(real* DD,
 										   real* bufferFs,
 										   int* sendIndex,
                                            int buffmax,
@@ -2259,7 +2261,7 @@ extern "C" __global__ void getSendFsPost27(real* DD,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void setRecvFsPre27(real* DD,
+__global__ void setRecvFsPre27(real* DD,
 										  real* bufferFs,
 										  int* recvIndex,
                                           int buffmax,
@@ -2269,7 +2271,7 @@ extern "C" __global__ void setRecvFsPre27(real* DD,
                                           unsigned int size_Mat,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void setRecvFsPost27(real* DD,
+__global__ void setRecvFsPost27(real* DD,
 										   real* bufferFs,
 										   int* recvIndex,
                                            int buffmax,
@@ -2279,7 +2281,7 @@ extern "C" __global__ void setRecvFsPost27(real* DD,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void getSendGsF3(
+__global__ void getSendGsF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -2290,7 +2292,7 @@ extern "C" __global__ void getSendGsF3(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void setRecvGsF3(
+__global__ void setRecvGsF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -2301,7 +2303,7 @@ extern "C" __global__ void setRecvGsF3(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void WallFunction27( 	real* vx,
+__global__ void WallFunction27( 	real* vx,
 											real* vy,
 											real* vz,
 											real* DD,
@@ -2315,7 +2317,7 @@ extern "C" __global__ void WallFunction27( 	real* vx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void LBSetOutputWallVelocitySP27( real* vxD,
+__global__ void LBSetOutputWallVelocitySP27( real* vxD,
 														real* vyD,
 														real* vzD,
 														real* vxWall,
@@ -2333,7 +2335,7 @@ extern "C" __global__ void LBSetOutputWallVelocitySP27( real* vxD,
 														real* DD,
 														bool isEvenTimestep);
 
-extern "C" __global__ void GetVeloforForcing27( real* DD,
+__global__ void GetVeloforForcing27( real* DD,
 												int* bcIndex,
 												int nonAtBC,
 												real* Vx,
@@ -2345,7 +2347,7 @@ extern "C" __global__ void GetVeloforForcing27( real* DD,
 												unsigned int size_Mat,
 												bool isEvenTimestep);
 
-extern "C" __global__ void InitParticles( real* coordX,
+__global__ void InitParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ,
 										  real* coordParticleXlocal,
@@ -2369,7 +2371,7 @@ extern "C" __global__ void InitParticles( real* coordX,
 									      unsigned int numberOfParticles,
 										  unsigned int size_Mat);
 
-extern "C" __global__ void MoveParticles( real* coordX,
+__global__ void MoveParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ,
 										  real* coordParticleXlocal,
@@ -2397,7 +2399,7 @@ extern "C" __global__ void MoveParticles( real* coordX,
 										  unsigned int size_Mat,
 										  bool isEvenTimestep);
 
-extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
+__global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  real* coordY,
 													  real* coordZ,
 													  real* coordParticleXlocal,
@@ -2425,12 +2427,12 @@ extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  unsigned int size_Mat,
 													  bool isEvenTimestep);
 
-extern "C" __global__ void initRandom(curandState* state);
+__global__ void initRandom(curandState* state);
 
-extern "C" __global__ void generateRandomValues(curandState* state,
+__global__ void generateRandomValues(curandState* state,
 												real* randArray);
 
-extern "C" __global__ void CalcTurbulenceIntensity(
+__global__ void CalcTurbulenceIntensity(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
index b27df37882b684e3fc8cf3b09e39a6195baed5de..d3f9349c9b90ea6103a265b5d3f4fb059fd4947e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBInit27( int myid,
+__global__ void LBInit27( int myid,
                                      int numprocs,
                                      real u0,
                                      unsigned int* geoD,
@@ -182,7 +182,7 @@ extern "C" __global__ void LBInit27( int myid,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
+__global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
                                                 unsigned int* neighborY,
                                                 unsigned int* neighborZ,
                                                 unsigned int* neighborWSB,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
index 38e409730a8316f9f8b620d931d25aadc80d4b01..be2c18ae5dd32d00c28efbce23907d65aca3b39b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
@@ -38,7 +38,7 @@
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
-extern "C" __global__ void InitAD27(
+__global__ void InitAD27(
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
@@ -229,7 +229,7 @@ extern "C" __global__ void InitAD27(
 // DEPRECATED (2022)
 
 // ////////////////////////////////////////////////////////////////////////////////
-// extern "C" __global__ void InitAD27(unsigned int* neighborX,
+// __global__ void InitAD27(unsigned int* neighborX,
 //                                        unsigned int* neighborY,
 //                                        unsigned int* neighborZ,
 //                                        unsigned int* geoD,
@@ -440,7 +440,7 @@ extern "C" __global__ void InitAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void InitAD7( unsigned int* neighborX,
+__global__ void InitAD7( unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 884fa94781e6a120205de7b0b4a8a302ce74dc62..316a49c420eaf76dcee49e23591bf166d1859f2e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -18,7 +18,7 @@
 
 #include "Parameter/Parameter.h"
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCas27( unsigned int grid_nx,
+void KernelCas27( unsigned int grid_nx,
                              unsigned int grid_ny,
                              unsigned int grid_nz,
                              real s9,
@@ -44,7 +44,7 @@ extern "C" void KernelCas27( unsigned int grid_nx,
      getLastCudaError("LB_Kernel_Casc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCasSP27( unsigned int numberOfThreads,
+void KernelCasSP27( unsigned int numberOfThreads,
                                real s9,
                                unsigned int* bcMatD,
                                unsigned int* neighborX,
@@ -80,7 +80,7 @@ extern "C" void KernelCasSP27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_Casc_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCasSPMS27( unsigned int numberOfThreads,
+void KernelCasSPMS27( unsigned int numberOfThreads,
                                  real s9,
                                  unsigned int* bcMatD,
                                  unsigned int* neighborX,
@@ -116,7 +116,7 @@ extern "C" void KernelCasSPMS27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads,
+void KernelCasSPMSOHM27( unsigned int numberOfThreads,
                                     real s9,
                                     unsigned int* bcMatD,
                                     unsigned int* neighborX,
@@ -152,7 +152,7 @@ extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumCompSRTSP27(
+void KernelKumCompSRTSP27(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -194,7 +194,7 @@ extern "C" void KernelKumCompSRTSP27(
       getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads,
+void KernelKum1hSP27(    unsigned int numberOfThreads,
 									real omega,
 									real deltaPhi,
 									real angularVelocity,
@@ -240,7 +240,7 @@ extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCascadeSP27(  unsigned int numberOfThreads,
+void KernelCascadeSP27(  unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -276,7 +276,7 @@ extern "C" void KernelCascadeSP27(  unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
+void KernelKumNewSP27(   unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -312,7 +312,7 @@ extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumNewCompSP27(unsigned int numberOfThreads,
+void KernelKumNewCompSP27(unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -358,7 +358,7 @@ extern "C" void KernelKumNewCompSP27(unsigned int numberOfThreads,
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
+void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
 																	real s9,
 																	unsigned int* bcMatD,
 																	unsigned int* neighborX,
@@ -403,7 +403,7 @@ extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int
 		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
+void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
 														real s9,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
@@ -448,7 +448,7 @@ extern "C" void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThr
 		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CumulantOneChimCompSP27(unsigned int numberOfThreads,
+void CumulantOneChimCompSP27(unsigned int numberOfThreads,
 										real s9,
 										unsigned int* bcMatD,
 										unsigned int* neighborX,
@@ -493,7 +493,7 @@ extern "C" void CumulantOneChimCompSP27(unsigned int numberOfThreads,
 		getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads,
+void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 									 real s9,
 									 unsigned int* bcMatD,
 									 unsigned int* neighborX,
@@ -535,7 +535,7 @@ extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 	getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads,
+void KernelKumCompSP27(  unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -571,7 +571,7 @@ extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
+void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 									   real omega,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
@@ -619,7 +619,7 @@ extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 	getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
+void KernelWaleBySoniMalavCumAA2016CompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -676,7 +676,7 @@ extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
 	getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelADincomp7(   unsigned int numberOfThreads,
+void KernelADincomp7(   unsigned int numberOfThreads,
 								   real diffusivity,
 								   unsigned int* bcMatD,
 								   unsigned int* neighborX,
@@ -714,7 +714,7 @@ extern "C" void KernelADincomp7(   unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelADincomp27( unsigned int numberOfThreads,
+void KernelADincomp27( unsigned int numberOfThreads,
 								  real diffusivity,
 								  unsigned int* bcMatD,
 								  unsigned int* neighborX,
@@ -752,7 +752,7 @@ extern "C" void KernelADincomp27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Init27( int myid,
+void Init27( int myid,
                         int numprocs,
                         real u0,
                         unsigned int* geoD,
@@ -789,7 +789,7 @@ extern "C" void Init27( int myid,
       getLastCudaError("LBInit27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitNonEqPartSP27( unsigned int numberOfThreads,
+void InitNonEqPartSP27( unsigned int numberOfThreads,
                                    unsigned int* neighborX,
                                    unsigned int* neighborY,
                                    unsigned int* neighborZ,
@@ -835,7 +835,7 @@ extern "C" void InitNonEqPartSP27( unsigned int numberOfThreads,
       getLastCudaError("LBInitNonEqPartSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitThS7(     unsigned int numberOfThreads,
+void InitThS7(     unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
@@ -877,7 +877,7 @@ extern "C" void InitThS7(     unsigned int numberOfThreads,
       getLastCudaError("InitAD7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitADDev27( unsigned int numberOfThreads,
+void InitADDev27( unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
@@ -919,7 +919,7 @@ extern "C" void InitADDev27( unsigned int numberOfThreads,
       getLastCudaError("InitAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PostProcessorF3_2018Fehlberg(
+void PostProcessorF3_2018Fehlberg(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -970,7 +970,7 @@ extern "C" void PostProcessorF3_2018Fehlberg(
       getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMac27( real* vxD,
+void CalcMac27( real* vxD,
                            real* vyD,
                            real* vzD,
                            real* rhoD,
@@ -1002,7 +1002,7 @@ extern "C" void CalcMac27( real* vxD,
       getLastCudaError("LBCalcMac27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacSP27( real* vxD,
+void CalcMacSP27( real* vxD,
                              real* vyD,
                              real* vzD,
                              real* rhoD,
@@ -1046,7 +1046,7 @@ extern "C" void CalcMacSP27( real* vxD,
       getLastCudaError("LBCalcMacSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacCompSP27( real* vxD,
+void CalcMacCompSP27( real* vxD,
 								 real* vyD,
 								 real* vzD,
 								 real* rhoD,
@@ -1090,7 +1090,7 @@ extern "C" void CalcMacCompSP27( real* vxD,
       getLastCudaError("LBCalcMacSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacThS7(  real* Conc,
+void CalcMacThS7(  real* Conc,
                               unsigned int* geoD,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
@@ -1126,7 +1126,7 @@ extern "C" void CalcMacThS7(  real* Conc,
       getLastCudaError("CalcConc7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PlaneConcThS7(real* Conc,
+void PlaneConcThS7(real* Conc,
 							  int* kPC,
 							  unsigned int numberOfPointskPC,
 							  unsigned int* geoD,
@@ -1166,7 +1166,7 @@ extern "C" void PlaneConcThS7(real* Conc,
       getLastCudaError("GetPlaneConc7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PlaneConcThS27(real* Conc,
+void PlaneConcThS27(real* Conc,
 							   int* kPC,
 							   unsigned int numberOfPointskPC,
 							   unsigned int* geoD,
@@ -1206,7 +1206,7 @@ extern "C" void PlaneConcThS27(real* Conc,
       getLastCudaError("GetPlaneConc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcConcentration27( unsigned int numberOfThreads,
+void CalcConcentration27( unsigned int numberOfThreads,
                                      real* Conc,
                                      unsigned int* geoD,
                                      unsigned int* neighborX,
@@ -1242,7 +1242,7 @@ extern "C" void CalcConcentration27( unsigned int numberOfThreads,
       getLastCudaError("CalcConc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMedSP27(  real* vxD,
+void CalcMedSP27(  real* vxD,
                               real* vyD,
                               real* vzD,
                               real* rhoD,
@@ -1286,7 +1286,7 @@ extern "C" void CalcMedSP27(  real* vxD,
       getLastCudaError("LBCalcMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMedCompSP27(  real* vxD,
+void CalcMedCompSP27(  real* vxD,
 								  real* vyD,
 								  real* vzD,
 								  real* rhoD,
@@ -1330,7 +1330,7 @@ extern "C" void CalcMedCompSP27(  real* vxD,
       getLastCudaError("LBCalcMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMedCompAD27(
+void CalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1380,7 +1380,7 @@ extern "C" void CalcMedCompAD27(
 	getLastCudaError("LBCalcMedAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacMedSP27(  real* vxD,
+void CalcMacMedSP27(  real* vxD,
                                  real* vyD,
                                  real* vzD,
                                  real* rhoD,
@@ -1424,7 +1424,7 @@ extern "C" void CalcMacMedSP27(  real* vxD,
       getLastCudaError("LBCalcMacMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ResetMedianValuesSP27(
+void ResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1460,7 +1460,7 @@ extern "C" void ResetMedianValuesSP27(
 	getLastCudaError("LBResetMedianValuesSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ResetMedianValuesAD27(
+void ResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1498,7 +1498,7 @@ extern "C" void ResetMedianValuesAD27(
 	getLastCudaError("LBResetMedianValuesAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
+void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 real* kyzFromfcNEQ,
 										 real* kxzFromfcNEQ,
 										 real* kxxMyyFromfcNEQ,
@@ -1542,7 +1542,7 @@ extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
       getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
+void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
 										real* kyzFromfcNEQ,
 										real* kxzFromfcNEQ,
 										real* kxxMyyFromfcNEQ,
@@ -1586,7 +1586,7 @@ extern "C" void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
       getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
+void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 real* CUMabc,
 										 real* CUMbac,
 										 real* CUMbca,
@@ -1634,7 +1634,7 @@ extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
       getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc3rdMomentsCompSP27( real* CUMbbb,
+void Calc3rdMomentsCompSP27( real* CUMbbb,
 										real* CUMabc,
 										real* CUMbac,
 										real* CUMbca,
@@ -1682,7 +1682,7 @@ extern "C" void Calc3rdMomentsCompSP27( real* CUMbbb,
       getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
+void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											real* CUMbcb,
 											real* CUMbbc,
 											real* CUMcca,
@@ -1736,7 +1736,7 @@ extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
       getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcHigherMomentsCompSP27(  real* CUMcbb,
+void CalcHigherMomentsCompSP27(  real* CUMcbb,
 											real* CUMbcb,
 											real* CUMbbc,
 											real* CUMcca,
@@ -1790,7 +1790,7 @@ extern "C" void CalcHigherMomentsCompSP27(  real* CUMcbb,
       getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void LBCalcMeasurePoints27(real* vxMP,
+void LBCalcMeasurePoints27(real* vxMP,
                                       real* vyMP,
                                       real* vzMP,
                                       real* rhoMP,
@@ -1840,7 +1840,7 @@ extern "C" void LBCalcMeasurePoints27(real* vxMP,
       getLastCudaError("LBCalcMeasurePoints execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BcPress27( int nx,
+void BcPress27( int nx,
                            int ny,
                            int tz,
                            unsigned int grid_nx,
@@ -1869,7 +1869,7 @@ extern "C" void BcPress27( int nx,
       getLastCudaError("LB_BC_Press_East27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BcVel27(int nx,
+void BcVel27(int nx,
                         int ny,
                         int nz,
                         int itz,
@@ -1906,9 +1906,7 @@ extern "C" void BcVel27(int nx,
       getLastCudaError("LB_BC_Vel_West_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressDev7( unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
+void QADPressDev7( unsigned int numberOfThreads,
                               real* DD,
                               real* DD7,
                               real* temp,
@@ -1956,7 +1954,7 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
       getLastCudaError("QADPress7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressDev27(unsigned int numberOfThreads,
+void QADPressDev27(unsigned int numberOfThreads,
                               real* DD,
                               real* DD27,
                               real* temp,
@@ -2004,7 +2002,7 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
       getLastCudaError("QADPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressNEQNeighborDev27(
+void QADPressNEQNeighborDev27(
 											unsigned int numberOfThreads,
 											real* DD,
 											real* DD27,
@@ -2049,7 +2047,7 @@ extern "C" void QADPressNEQNeighborDev27(
    getLastCudaError("QADPressNEQNeighbor27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVelDev7(unsigned int numberOfThreads,
+void QADVelDev7(unsigned int numberOfThreads,
                            real* DD,
                            real* DD7,
                            real* temp,
@@ -2098,7 +2096,7 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
       getLastCudaError("QADVel7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVelDev27(  unsigned int numberOfThreads,
+void QADVelDev27(  unsigned int numberOfThreads,
                               real* DD,
                               real* DD27,
                               real* temp,
@@ -2146,7 +2144,7 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
       getLastCudaError("QADVel27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADDev7(unsigned int numberOfThreads,
+void QADDev7(unsigned int numberOfThreads,
                         real* DD,
                         real* DD7,
                         real* temp,
@@ -2195,7 +2193,7 @@ extern "C" void QADDev7(unsigned int numberOfThreads,
 
 //////////////////////////////////////////////////////////////////////////
 // Other advection diffusion kernels are in kernel factory :(
-extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
+void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
    uint numberOfThreads,
    real omegaDiffusivity,
    uint* typeOfGridNode,
@@ -2227,7 +2225,7 @@ extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ADSlipVelDevComp(
+void ADSlipVelDevComp(
 	uint numberOfThreads,
 	real * normalX,
 	real * normalY,
@@ -2267,7 +2265,7 @@ extern "C" void ADSlipVelDevComp(
 }
 //////////////////////////////////////////////////////////////////////////
 
-extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
+void QADDirichletDev27( unsigned int numberOfThreads,
 								   real* DD,
 								   real* DD27,
 								   real* temp,
@@ -2314,7 +2312,7 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
       getLastCudaError("QADDirichletDev27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADBBDev27(unsigned int numberOfThreads,
+void QADBBDev27(unsigned int numberOfThreads,
                            real* DD,
                            real* DD27,
                            real* temp,
@@ -2360,7 +2358,7 @@ extern "C" void QADBBDev27(unsigned int numberOfThreads,
       getLastCudaError("QADBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
+void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									real* DD,
 									real* DD7,
 									real* temp,
@@ -2407,7 +2405,7 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
       getLastCudaError("QNoSlipADincomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
+void QNoSlipADincompDev27(  unsigned int numberOfThreads,
 									   real* DD,
 									   real* DD27,
 									   real* temp,
@@ -2454,7 +2452,7 @@ extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
       getLastCudaError("QNoSlipADincomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
+void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   real* DD,
 								   real* DD7,
 								   real* temp,
@@ -2503,7 +2501,7 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
       getLastCudaError("QADVeloIncomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
+void QADVeloIncompDev27(   unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD27,
 									  real* temp,
@@ -2552,7 +2550,7 @@ extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
       getLastCudaError("QADVeloIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressIncompDev7( unsigned int numberOfThreads,
+void QADPressIncompDev7( unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD7,
 									  real* temp,
@@ -2601,7 +2599,7 @@ extern "C" void QADPressIncompDev7( unsigned int numberOfThreads,
       getLastCudaError("QADPressIncomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
+void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD27,
 									  real* temp,
@@ -2650,7 +2648,7 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
       getLastCudaError("QADPressIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2670,7 +2668,7 @@ extern "C" void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryCond
       getLastCudaError("QDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2689,7 +2687,7 @@ extern "C" void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundary
       getLastCudaError("QDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
+void QDevCompThinWalls27(unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
@@ -2745,7 +2743,7 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
 
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1);
@@ -2764,7 +2762,7 @@ extern "C" void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, Qf
    getLastCudaError("QDevice3rdMomentsComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
+void QDevIncompHighNu27( unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
@@ -2805,7 +2803,7 @@ extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
       getLastCudaError("QDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevCompHighNu27(   unsigned int numberOfThreads,
+void QDevCompHighNu27(   unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
@@ -2846,7 +2844,7 @@ extern "C" void QDevCompHighNu27(   unsigned int numberOfThreads,
       getLastCudaError("QDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2867,7 +2865,7 @@ extern "C" void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, Qfo
    getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
+void QVelDeviceCouette27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
@@ -2913,7 +2911,7 @@ extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
       getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevice1h27(   unsigned int numberOfThreads,
+void QVelDevice1h27(   unsigned int numberOfThreads,
 								  int nx,
 								  int ny,
 								  real* vx,
@@ -2973,7 +2971,7 @@ extern "C" void QVelDevice1h27(   unsigned int numberOfThreads,
       getLastCudaError("QVelDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2997,7 +2995,7 @@ extern "C" void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryC
       getLastCudaError("QVelDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
+void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -3044,7 +3042,7 @@ extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
       getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3066,7 +3064,7 @@ extern "C" void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBound
    getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
+void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           real* vx,
 							           real* vy,
 							           real* vz,
@@ -3128,7 +3126,7 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
    getLastCudaError("QThinWallsPartTwo27 execution failed");
 }
 
-extern "C" void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3150,7 +3148,7 @@ extern "C" void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice,
    getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
+void QVelDevIncompHighNu27(unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -3197,7 +3195,7 @@ extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
       getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompHighNu27(  unsigned int numberOfThreads,
+void QVelDevCompHighNu27(  unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -3244,7 +3242,7 @@ extern "C" void QVelDevCompHighNu27(  unsigned int numberOfThreads,
       getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
+void QVeloDevEQ27(unsigned int numberOfThreads,
 							 real* VeloX,
 							 real* VeloY,
 							 real* VeloZ,
@@ -3288,7 +3286,7 @@ extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
       getLastCudaError("QVeloDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVeloStreetDevEQ27(
+void QVeloStreetDevEQ27(
 	uint  numberOfThreads,
 	real* veloXfraction,
 	real* veloYfraction,
@@ -3334,7 +3332,7 @@ extern "C" void QVeloStreetDevEQ27(
 	getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3353,7 +3351,7 @@ extern "C" void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundary
    getLastCudaError("QSlipDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3373,7 +3371,7 @@ extern "C" void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* paramet
    getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3392,7 +3390,7 @@ extern "C" void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoun
    getLastCudaError("QSlipDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
+void QSlipGeomDevComp27(unsigned int numberOfThreads,
 								   real* DD,
 								   int* k_Q,
 								   real* QQ,
@@ -3438,7 +3436,7 @@ extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
       getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
+void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   real* DD,
 								   int* k_Q,
 								   real* QQ,
@@ -3484,7 +3482,7 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
       getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
    dim3 grid = vf::cuda::getCudaGrid(  para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
@@ -3525,7 +3523,7 @@ extern "C" void QStressDevComp27(Parameter *para,  QforBoundaryConditions* bound
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
    dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
@@ -3563,7 +3561,7 @@ extern "C" void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundary
       getLastCudaError("BBStressDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3583,7 +3581,7 @@ extern "C" void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundar
    getLastCudaError("QPressDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
+void QPressDevAntiBB27(  unsigned int numberOfThreads,
                                     real* rhoBC,
 									real* vx,
 									real* vy,
@@ -3631,7 +3629,7 @@ extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
     getLastCudaError("QPressDeviceAntiBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevFixBackflow27( unsigned int numberOfThreads,
+void QPressDevFixBackflow27( unsigned int numberOfThreads,
                                         real* rhoBC,
                                         real* DD,
                                         int* k_Q,
@@ -3671,7 +3669,7 @@ extern "C" void QPressDevFixBackflow27( unsigned int numberOfThreads,
       getLastCudaError("QPressDeviceFixBackflow27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevDirDepBot27(  unsigned int numberOfThreads,
+void QPressDevDirDepBot27(  unsigned int numberOfThreads,
                                        real* rhoBC,
                                        real* DD,
                                        int* k_Q,
@@ -3711,7 +3709,7 @@ extern "C" void QPressDevDirDepBot27(  unsigned int numberOfThreads,
       getLastCudaError("QPressDeviceDirDepBot27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3731,7 +3729,7 @@ extern "C" void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBo
    getLastCudaError("QPressNoRhoDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3751,7 +3749,7 @@ extern "C" void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice
    getLastCudaError("QInflowScaleByPressDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevOld27(  unsigned int numberOfThreads,
+void QPressDevOld27(  unsigned int numberOfThreads,
                                      real* rhoBC,
                                      real* DD,
                                      int* k_Q,
@@ -3793,7 +3791,7 @@ extern "C" void QPressDevOld27(  unsigned int numberOfThreads,
       getLastCudaError("QPressDeviceOld27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3813,7 +3811,7 @@ extern "C" void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, Qf
    getLastCudaError("QPressDeviceIncompNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3833,7 +3831,7 @@ extern "C" void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoun
    getLastCudaError("QPressDevNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3854,7 +3852,7 @@ extern "C" void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoun
       getLastCudaError("QPressDeviceEQZ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevZero27(unsigned int numberOfThreads,
+void QPressDevZero27(unsigned int numberOfThreads,
                                 real* DD,
                                 int* k_Q,
                                 unsigned int numberOfBCnodes,
@@ -3890,7 +3888,7 @@ extern "C" void QPressDevZero27(unsigned int numberOfThreads,
       getLastCudaError("QPressDeviceOld27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevFake27(     unsigned int numberOfThreads,
+void QPressDevFake27(     unsigned int numberOfThreads,
                                      real* rhoBC,
                                      real* DD,
                                      int* k_Q,
@@ -3932,7 +3930,7 @@ extern "C" void QPressDevFake27(     unsigned int numberOfThreads,
       getLastCudaError("QPressDeviceFake27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3950,7 +3948,7 @@ extern "C" void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryCon
    getLastCudaError("BBDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
+void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									real* rho,
 									real* DD,
 									int* k_Q,
@@ -3993,7 +3991,7 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
+void PressSchlaffer27(unsigned int numberOfThreads,
                                  real* rhoBC,
                                  real* DD,
                                  real* vx0,
@@ -4044,7 +4042,7 @@ extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
+void VelSchlaffer27(  unsigned int numberOfThreads,
                                  int t,
                                  real* DD,
                                  real* vz0,
@@ -4090,7 +4088,7 @@ extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
       getLastCudaError("VelSchlaff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PropVelo(   unsigned int numberOfThreads,
+void PropVelo(   unsigned int numberOfThreads,
                             unsigned int* neighborX,
                             unsigned int* neighborY,
                             unsigned int* neighborZ,
@@ -4136,7 +4134,7 @@ extern "C" void PropVelo(   unsigned int numberOfThreads,
       getLastCudaError("PropellerBC execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF27( real* DC,
+void ScaleCF27( real* DC,
                         real* DF,
                         unsigned int* neighborCX,
                         unsigned int* neighborCY,
@@ -4198,7 +4196,7 @@ extern "C" void ScaleCF27( real* DC,
       getLastCudaError("scaleCF27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFEff27(real* DC,
+void ScaleCFEff27(real* DC,
                              real* DF,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -4262,7 +4260,7 @@ extern "C" void ScaleCFEff27(real* DC,
       getLastCudaError("scaleCFEff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFLast27(real* DC,
+void ScaleCFLast27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -4326,7 +4324,7 @@ extern "C" void ScaleCFLast27(real* DC,
       getLastCudaError("scaleCFLast27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFpress27(  real* DC,
+void ScaleCFpress27(  real* DC,
                                  real* DF,
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -4390,7 +4388,7 @@ extern "C" void ScaleCFpress27(  real* DC,
       getLastCudaError("scaleCFpress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_Fix_27(  real* DC,
+void ScaleCF_Fix_27(  real* DC,
                                  real* DF,
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -4454,7 +4452,7 @@ extern "C" void ScaleCF_Fix_27(  real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_Fix_comp_27( real* DC,
+void ScaleCF_Fix_comp_27( real* DC,
 									 real* DF,
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -4518,7 +4516,7 @@ extern "C" void ScaleCF_Fix_comp_27( real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_0817_comp_27(real* DC,
+void ScaleCF_0817_comp_27(real* DC,
 									 real* DF,
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -4583,7 +4581,7 @@ extern "C" void ScaleCF_0817_comp_27(real* DC,
       getLastCudaError("scaleCF_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_comp_D3Q27F3_2018(real* DC,
+void ScaleCF_comp_D3Q27F3_2018(real* DC,
 										  real* DF,
 										  real* G6,
 										  unsigned int* neighborCX,
@@ -4649,7 +4647,7 @@ extern "C" void ScaleCF_comp_D3Q27F3_2018(real* DC,
       getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
+void ScaleCF_comp_D3Q27F3(real* DC,
 									 real* DF,
 									 real* G6,
 									 unsigned int* neighborCX,
@@ -4716,7 +4714,7 @@ extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
       getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_staggered_time_comp_27(  real* DC,
+void ScaleCF_staggered_time_comp_27(  real* DC,
 												 real* DF,
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -4780,7 +4778,7 @@ extern "C" void ScaleCF_staggered_time_comp_27(  real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_RhoSq_comp_27(   real* DC,
+void ScaleCF_RhoSq_comp_27(   real* DC,
 										 real* DF,
 										 unsigned int* neighborCX,
 										 unsigned int* neighborCY,
@@ -4845,7 +4843,7 @@ extern "C" void ScaleCF_RhoSq_comp_27(   real* DC,
       getLastCudaError("scaleCF_RhoSq_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
+void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
 											 real* DF,
 											 unsigned int* neighborCX,
 											 unsigned int* neighborCY,
@@ -4910,7 +4908,7 @@ extern "C" void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
       getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_AA2016_comp_27(real* DC,
+void ScaleCF_AA2016_comp_27(real* DC,
 									   real* DF,
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -4975,7 +4973,7 @@ extern "C" void ScaleCF_AA2016_comp_27(real* DC,
       getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_NSPress_27(  real* DC,
+void ScaleCF_NSPress_27(  real* DC,
 									 real* DF,
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -5039,7 +5037,7 @@ extern "C" void ScaleCF_NSPress_27(  real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFThSMG7(   real* DC,
+void ScaleCFThSMG7(   real* DC,
                                  real* DF,
                                  real* DD7C,
                                  real* DD7F,
@@ -5097,7 +5095,7 @@ extern "C" void ScaleCFThSMG7(   real* DC,
       getLastCudaError("scaleCFThSMG7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFThS7(  real* DC,
+void ScaleCFThS7(  real* DC,
                               real* DF,
                               real* DD7C,
                               real* DD7F,
@@ -5153,7 +5151,7 @@ extern "C" void ScaleCFThS7(  real* DC,
       getLastCudaError("scaleCFThS7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFThS27( real* DC,
+void ScaleCFThS27( real* DC,
                               real* DF,
                               real* DD27C,
                               real* DD27F,
@@ -5211,7 +5209,7 @@ extern "C" void ScaleCFThS27( real* DC,
       getLastCudaError("scaleCFThS27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC27( real* DC,
+void ScaleFC27( real* DC,
                            real* DF,
                            unsigned int* neighborCX,
                            unsigned int* neighborCY,
@@ -5273,7 +5271,7 @@ extern "C" void ScaleFC27( real* DC,
       getLastCudaError("scaleFC27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCEff27(real* DC,
+void ScaleFCEff27(real* DC,
                              real* DF,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -5337,7 +5335,7 @@ extern "C" void ScaleFCEff27(real* DC,
       getLastCudaError("scaleFCEff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCLast27(real* DC,
+void ScaleFCLast27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -5401,7 +5399,7 @@ extern "C" void ScaleFCLast27(real* DC,
       getLastCudaError("Kernel execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCpress27(real* DC,
+void ScaleFCpress27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -5465,7 +5463,7 @@ extern "C" void ScaleFCpress27(real* DC,
       getLastCudaError("scaleFCpress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_Fix_27(real* DC,
+void ScaleFC_Fix_27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -5529,7 +5527,7 @@ extern "C" void ScaleFC_Fix_27(real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_Fix_comp_27(  real* DC,
+void ScaleFC_Fix_comp_27(  real* DC,
 									  real* DF,
 									  unsigned int* neighborCX,
 									  unsigned int* neighborCY,
@@ -5593,7 +5591,7 @@ extern "C" void ScaleFC_Fix_comp_27(  real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_0817_comp_27( real* DC,
+void ScaleFC_0817_comp_27( real* DC,
 									  real* DF,
 									  unsigned int* neighborCX,
 									  unsigned int* neighborCY,
@@ -5658,7 +5656,7 @@ extern "C" void ScaleFC_0817_comp_27( real* DC,
       getLastCudaError("scaleFC_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_comp_D3Q27F3_2018( real* DC,
+void ScaleFC_comp_D3Q27F3_2018( real* DC,
 										   real* DF,
 										   real* G6,
 										   unsigned int* neighborCX,
@@ -5724,7 +5722,7 @@ extern "C" void ScaleFC_comp_D3Q27F3_2018( real* DC,
       getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
+void ScaleFC_comp_D3Q27F3( real* DC,
 									  real* DF,
 									  real* G6,
 									  unsigned int* neighborCX,
@@ -5791,7 +5789,7 @@ extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
       getLastCudaError("scaleFC_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_staggered_time_comp_27(   real* DC,
+void ScaleFC_staggered_time_comp_27(   real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -5855,7 +5853,7 @@ extern "C" void ScaleFC_staggered_time_comp_27(   real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
+void ScaleFC_RhoSq_comp_27(real* DC,
 									  real* DF,
 									  unsigned int* neighborCX,
 									  unsigned int* neighborCY,
@@ -5922,7 +5920,7 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
+void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  real* DF,
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -5987,7 +5985,7 @@ extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
       getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_AA2016_comp_27( real* DC,
+void ScaleFC_AA2016_comp_27( real* DC,
 										real* DF,
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -6052,7 +6050,7 @@ extern "C" void ScaleFC_AA2016_comp_27( real* DC,
       getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_NSPress_27(real* DC,
+void ScaleFC_NSPress_27(real* DC,
 								  real* DF,
 								  unsigned int* neighborCX,
 								  unsigned int* neighborCY,
@@ -6116,7 +6114,7 @@ extern "C" void ScaleFC_NSPress_27(real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCThSMG7(real* DC,
+void ScaleFCThSMG7(real* DC,
                               real* DF,
                               real* DD7C,
                               real* DD7F,
@@ -6174,7 +6172,7 @@ extern "C" void ScaleFCThSMG7(real* DC,
       getLastCudaError("scaleFCThSMG7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCThS7(  real* DC,
+void ScaleFCThS7(  real* DC,
                               real* DF,
                               real* DD7C,
                               real* DD7F,
@@ -6230,7 +6228,7 @@ extern "C" void ScaleFCThS7(  real* DC,
       getLastCudaError("scaleFCThS7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCThS27( real* DC,
+void ScaleFCThS27( real* DC,
                               real* DF,
                               real* DD27C,
                               real* DD27F,
@@ -6288,7 +6286,7 @@ extern "C" void ScaleFCThS27( real* DC,
       getLastCudaError("scaleFCThS27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void DragLiftPostD27(real* DD,
+void DragLiftPostD27(real* DD,
 								int* k_Q,
 								real* QQ,
 								int numberOfBCnodes,
@@ -6332,7 +6330,7 @@ extern "C" void DragLiftPostD27(real* DD,
 	getLastCudaError("DragLift27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void DragLiftPreD27( real* DD,
+void DragLiftPreD27( real* DD,
 								int* k_Q,
 								real* QQ,
 								int numberOfBCnodes,
@@ -6376,7 +6374,7 @@ extern "C" void DragLiftPreD27( real* DD,
 	getLastCudaError("DragLift27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcCPtop27(real* DD,
+void CalcCPtop27(real* DD,
 							int* cpIndex,
 							int nonCp,
 							double *cpPress,
@@ -6414,7 +6412,7 @@ extern "C" void CalcCPtop27(real* DD,
 	getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcCPbottom27( real* DD,
+void CalcCPbottom27( real* DD,
 								int* cpIndex,
 								int nonCp,
 								double *cpPress,
@@ -6452,7 +6450,7 @@ extern "C" void CalcCPbottom27( real* DD,
 	getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void GetSendFsPreDev27(real* DD,
+void GetSendFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* sendIndex,
 								  int buffmax,
@@ -6491,7 +6489,7 @@ extern "C" void GetSendFsPreDev27(real* DD,
 	getLastCudaError("getSendFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void GetSendFsPostDev27(real* DD,
+void GetSendFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* sendIndex,
 								   int buffmax,
@@ -6530,7 +6528,7 @@ extern "C" void GetSendFsPostDev27(real* DD,
 	getLastCudaError("getSendFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void SetRecvFsPreDev27(real* DD,
+void SetRecvFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* recvIndex,
 								  int buffmax,
@@ -6569,7 +6567,7 @@ extern "C" void SetRecvFsPreDev27(real* DD,
 	getLastCudaError("setRecvFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void SetRecvFsPostDev27(real* DD,
+void SetRecvFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* recvIndex,
 								   int buffmax,
@@ -6608,7 +6606,7 @@ extern "C" void SetRecvFsPostDev27(real* DD,
 	getLastCudaError("setRecvFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void getSendGsDevF3(
+void getSendGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -6648,7 +6646,7 @@ extern "C" void getSendGsDevF3(
 	getLastCudaError("getSendGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void setRecvGsDevF3(
+void setRecvGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -6688,7 +6686,7 @@ extern "C" void setRecvGsDevF3(
 	getLastCudaError("setRecvGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void WallFuncDev27(unsigned int numberOfThreads,
+void WallFuncDev27(unsigned int numberOfThreads,
 							  real* vx,
 							  real* vy,
 							  real* vz,
@@ -6735,7 +6733,7 @@ extern "C" void WallFuncDev27(unsigned int numberOfThreads,
       getLastCudaError("WallFunction27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
+void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  real* vxD,
 										  real* vyD,
 										  real* vzD,
@@ -6789,7 +6787,7 @@ extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
       getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
+void GetVelotoForce27(unsigned int numberOfThreads,
 								 real* DD,
 								 int* bcIndex,
 								 int nonAtBC,
@@ -6831,7 +6829,7 @@ extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
       getLastCudaError("GetVeloforForcing27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitParticlesDevice(real* coordX,
+void InitParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ,
 									real* coordParticleXlocal,
@@ -6897,7 +6895,7 @@ extern "C" void InitParticlesDevice(real* coordX,
       getLastCudaError("InitParticles execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void MoveParticlesDevice(real* coordX,
+void MoveParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ,
 									real* coordParticleXlocal,
@@ -6971,7 +6969,7 @@ extern "C" void MoveParticlesDevice(real* coordX,
       getLastCudaError("MoveParticles execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void initRandomDevice(curandState* state,
+void initRandomDevice(curandState* state,
 								 unsigned int size_Mat,
 								 unsigned int numberOfThreads)
 {
@@ -6994,7 +6992,7 @@ extern "C" void initRandomDevice(curandState* state,
    getLastCudaError("initRandom execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void generateRandomValuesDevice( curandState* state,
+void generateRandomValuesDevice( curandState* state,
 											unsigned int size_Mat,
 											real* randArray,
 											unsigned int numberOfThreads)
@@ -7018,7 +7016,7 @@ extern "C" void generateRandomValuesDevice( curandState* state,
    getLastCudaError("generateRandomValues execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcTurbulenceIntensityDevice(
+void CalcTurbulenceIntensityDevice(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
index c48c9d9c902e94b1f2b5f4d464b75a1812b4ca79..31e83f56a6dd123d2d303374d6cf93adac9a32cd 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
@@ -15,7 +15,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDevice3rdMomentsComp27(
+__global__ void QDevice3rdMomentsComp27(
 													 real* distributions, 
 													 int* subgridDistanceIndices, 
 													 real* subgridDistances,
@@ -559,80 +559,77 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceIncompHighNu27(int inx,
-												 int iny,
-												 real* DD, 
+__global__ void QDeviceIncompHighNu27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
-												 unsigned int  numberOfBCnodes,
-												 int numberOfNodes, 
+												 unsigned int numberOfBCnodes,
 												 real om1, 
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
+												 unsigned int numberOfLBnodes, 
 												 bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[E   ] = &DD[E   *numberOfLBnodes];
+      D.f[W   ] = &DD[W   *numberOfLBnodes];
+      D.f[N   ] = &DD[N   *numberOfLBnodes];
+      D.f[S   ] = &DD[S   *numberOfLBnodes];
+      D.f[T   ] = &DD[T   *numberOfLBnodes];
+      D.f[B   ] = &DD[B   *numberOfLBnodes];
+      D.f[NE  ] = &DD[NE  *numberOfLBnodes];
+      D.f[SW  ] = &DD[SW  *numberOfLBnodes];
+      D.f[SE  ] = &DD[SE  *numberOfLBnodes];
+      D.f[NW  ] = &DD[NW  *numberOfLBnodes];
+      D.f[TE  ] = &DD[TE  *numberOfLBnodes];
+      D.f[BW  ] = &DD[BW  *numberOfLBnodes];
+      D.f[BE  ] = &DD[BE  *numberOfLBnodes];
+      D.f[TW  ] = &DD[TW  *numberOfLBnodes];
+      D.f[TN  ] = &DD[TN  *numberOfLBnodes];
+      D.f[BS  ] = &DD[BS  *numberOfLBnodes];
+      D.f[BN  ] = &DD[BN  *numberOfLBnodes];
+      D.f[TS  ] = &DD[TS  *numberOfLBnodes];
+      D.f[REST] = &DD[REST*numberOfLBnodes];
+      D.f[TNE ] = &DD[TNE *numberOfLBnodes];
+      D.f[TSW ] = &DD[TSW *numberOfLBnodes];
+      D.f[TSE ] = &DD[TSE *numberOfLBnodes];
+      D.f[TNW ] = &DD[TNW *numberOfLBnodes];
+      D.f[BNE ] = &DD[BNE *numberOfLBnodes];
+      D.f[BSW ] = &DD[BSW *numberOfLBnodes];
+      D.f[BSE ] = &DD[BSE *numberOfLBnodes];
+      D.f[BNW ] = &DD[BNW *numberOfLBnodes];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[W   ] = &DD[E   *numberOfLBnodes];
+      D.f[E   ] = &DD[W   *numberOfLBnodes];
+      D.f[S   ] = &DD[N   *numberOfLBnodes];
+      D.f[N   ] = &DD[S   *numberOfLBnodes];
+      D.f[B   ] = &DD[T   *numberOfLBnodes];
+      D.f[T   ] = &DD[B   *numberOfLBnodes];
+      D.f[SW  ] = &DD[NE  *numberOfLBnodes];
+      D.f[NE  ] = &DD[SW  *numberOfLBnodes];
+      D.f[NW  ] = &DD[SE  *numberOfLBnodes];
+      D.f[SE  ] = &DD[NW  *numberOfLBnodes];
+      D.f[BW  ] = &DD[TE  *numberOfLBnodes];
+      D.f[TE  ] = &DD[BW  *numberOfLBnodes];
+      D.f[TW  ] = &DD[BE  *numberOfLBnodes];
+      D.f[BE  ] = &DD[TW  *numberOfLBnodes];
+      D.f[BS  ] = &DD[TN  *numberOfLBnodes];
+      D.f[TN  ] = &DD[BS  *numberOfLBnodes];
+      D.f[TS  ] = &DD[BN  *numberOfLBnodes];
+      D.f[BN  ] = &DD[TS  *numberOfLBnodes];
+      D.f[REST] = &DD[REST*numberOfLBnodes];
+      D.f[TNE ] = &DD[BSW *numberOfLBnodes];
+      D.f[TSW ] = &DD[BNE *numberOfLBnodes];
+      D.f[TSE ] = &DD[BNW *numberOfLBnodes];
+      D.f[TNW ] = &DD[BSE *numberOfLBnodes];
+      D.f[BNE ] = &DD[TSW *numberOfLBnodes];
+      D.f[BSW ] = &DD[TNE *numberOfLBnodes];
+      D.f[BSE ] = &DD[TNW *numberOfLBnodes];
+      D.f[BNW ] = &DD[TSE *numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -645,7 +642,7 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -763,63 +760,63 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[E   ] = &DD[E   *numberOfLBnodes];
+         D.f[W   ] = &DD[W   *numberOfLBnodes];
+         D.f[N   ] = &DD[N   *numberOfLBnodes];
+         D.f[S   ] = &DD[S   *numberOfLBnodes];
+         D.f[T   ] = &DD[T   *numberOfLBnodes];
+         D.f[B   ] = &DD[B   *numberOfLBnodes];
+         D.f[NE  ] = &DD[NE  *numberOfLBnodes];
+         D.f[SW  ] = &DD[SW  *numberOfLBnodes];
+         D.f[SE  ] = &DD[SE  *numberOfLBnodes];
+         D.f[NW  ] = &DD[NW  *numberOfLBnodes];
+         D.f[TE  ] = &DD[TE  *numberOfLBnodes];
+         D.f[BW  ] = &DD[BW  *numberOfLBnodes];
+         D.f[BE  ] = &DD[BE  *numberOfLBnodes];
+         D.f[TW  ] = &DD[TW  *numberOfLBnodes];
+         D.f[TN  ] = &DD[TN  *numberOfLBnodes];
+         D.f[BS  ] = &DD[BS  *numberOfLBnodes];
+         D.f[BN  ] = &DD[BN  *numberOfLBnodes];
+         D.f[TS  ] = &DD[TS  *numberOfLBnodes];
+         D.f[REST] = &DD[REST*numberOfLBnodes];
+         D.f[TNE ] = &DD[TNE *numberOfLBnodes];
+         D.f[TSW ] = &DD[TSW *numberOfLBnodes];
+         D.f[TSE ] = &DD[TSE *numberOfLBnodes];
+         D.f[TNW ] = &DD[TNW *numberOfLBnodes];
+         D.f[BNE ] = &DD[BNE *numberOfLBnodes];
+         D.f[BSW ] = &DD[BSW *numberOfLBnodes];
+         D.f[BSE ] = &DD[BSE *numberOfLBnodes];
+         D.f[BNW ] = &DD[BNW *numberOfLBnodes];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[W   ] = &DD[E   *numberOfLBnodes];
+         D.f[E   ] = &DD[W   *numberOfLBnodes];
+         D.f[S   ] = &DD[N   *numberOfLBnodes];
+         D.f[N   ] = &DD[S   *numberOfLBnodes];
+         D.f[B   ] = &DD[T   *numberOfLBnodes];
+         D.f[T   ] = &DD[B   *numberOfLBnodes];
+         D.f[SW  ] = &DD[NE  *numberOfLBnodes];
+         D.f[NE  ] = &DD[SW  *numberOfLBnodes];
+         D.f[NW  ] = &DD[SE  *numberOfLBnodes];
+         D.f[SE  ] = &DD[NW  *numberOfLBnodes];
+         D.f[BW  ] = &DD[TE  *numberOfLBnodes];
+         D.f[TE  ] = &DD[BW  *numberOfLBnodes];
+         D.f[TW  ] = &DD[BE  *numberOfLBnodes];
+         D.f[BE  ] = &DD[TW  *numberOfLBnodes];
+         D.f[BS  ] = &DD[TN  *numberOfLBnodes];
+         D.f[TN  ] = &DD[BS  *numberOfLBnodes];
+         D.f[TS  ] = &DD[BN  *numberOfLBnodes];
+         D.f[BN  ] = &DD[TS  *numberOfLBnodes];
+         D.f[REST] = &DD[REST*numberOfLBnodes];
+         D.f[TNE ] = &DD[BSW *numberOfLBnodes];
+         D.f[TSW ] = &DD[BNE *numberOfLBnodes];
+         D.f[TSE ] = &DD[BNW *numberOfLBnodes];
+         D.f[TNW ] = &DD[BSE *numberOfLBnodes];
+         D.f[BNE ] = &DD[TSW *numberOfLBnodes];
+         D.f[BSW ] = &DD[TNE *numberOfLBnodes];
+         D.f[BSE ] = &DD[TNW *numberOfLBnodes];
+         D.f[BNW ] = &DD[TSE *numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1057,7 +1054,7 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceCompHighNu27(
+__global__ void QDeviceCompHighNu27(
 												 real* DD, 
 												 int* k_Q, 
 												 real* QQ,
@@ -1631,7 +1628,7 @@ extern "C" __global__ void QDeviceCompHighNu27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceComp27(
+__global__ void QDeviceComp27(
 										 real* distributions, 
 										 int* subgridDistanceIndices, 
 										 real* subgridDistances,
@@ -2015,7 +2012,7 @@ extern "C" __global__ void QDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDevice27(real* distributions, 
+__global__ void QDevice27(real* distributions, 
                                      int* subgridDistanceIndices, 
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes, 
@@ -2402,7 +2399,7 @@ extern "C" __global__ void QDevice27(real* distributions,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void BBDevice27(real* distributions, 
+__global__ void BBDevice27(real* distributions, 
                                      int* subgridDistanceIndices, 
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
index a0192efab597856ecf365ebc69c9035ba06d1b44..361857afb4fec490751f0e4bf40c19a309e61b8b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void InitParticles( real* coordX,
+__global__ void InitParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ, 
 										  real* coordParticleXlocal,
@@ -158,7 +158,7 @@ extern "C" __global__ void InitParticles( real* coordX,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void MoveParticles( real* coordX,
+__global__ void MoveParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ, 
 										  real* coordParticleXlocal,
@@ -1030,7 +1030,7 @@ extern "C" __global__ void MoveParticles( real* coordX,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
+__global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  real* coordY,
 													  real* coordZ, 
 													  real* coordParticleXlocal,
@@ -1896,7 +1896,7 @@ extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
+__global__ void ParticleNoSlipDeviceComp27(real* coordX,
 													  real* coordY,
 													  real* coordZ, 
 													  real* coordParticleXlocal,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index 2c4959eed943b4b72493a0c36630d8bc7ec83586..5ba78bffac957068699f78059f7c489a0fda5afd 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
+__global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 														 real* DD, 
 														 int* k_Q, 
 														 int* k_N, 
@@ -465,7 +465,7 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
+__global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													real* DD, 
 													int* k_Q, 
 													int* k_N, 
@@ -804,7 +804,7 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
+__global__ void QPressDeviceNEQ27(real* rhoBC,
                                              real* distribution, 
                                              int* bcNodeIndices,
                                              int* bcNeighborIndices,
@@ -1107,7 +1107,7 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_BC_Press_East27( int nx, 
+__global__ void LB_BC_Press_East27( int nx, 
                                                int ny, 
                                                int tz, 
                                                unsigned int* bcMatD, 
@@ -1416,7 +1416,7 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDevice27(real* rhoBC,
+__global__ void QPressDevice27(real* rhoBC,
                                            real* DD, 
                                            int* k_Q, 
                                            real* QQ,
@@ -1899,7 +1899,7 @@ extern "C" __global__ void QPressDevice27(real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
+__global__ void QPressDeviceAntiBB27(   real* rhoBC,
 												   real* vx,
 												   real* vy,
 												   real* vz,
@@ -2364,7 +2364,7 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
+__global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       real* DD, 
                                                       int* k_Q, 
                                                       int numberOfBCnodes, 
@@ -2555,7 +2555,7 @@ extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
+__global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      real* DD, 
                                                      int* k_Q, 
                                                      int numberOfBCnodes, 
@@ -2797,7 +2797,7 @@ extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
+__global__ void QPressNoRhoDevice27(  real* rhoBC,
 												 real* DD, 
 												 int* k_Q, 
 												 int* k_N, 
@@ -3314,7 +3314,7 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
+__global__ void QPressDeviceOld27(real* rhoBC,
                                              real* DD, 
                                              int* k_Q, 
                                              int* k_N, 
@@ -3573,7 +3573,7 @@ extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
+__global__ void QPressDeviceEQZ27(real* rhoBC,
                                              real* DD, 
                                              int* k_Q, 
                                              int* k_N,
@@ -4354,9 +4354,9 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceZero27(	 real* DD, 
+__global__ void QPressDeviceZero27(	 real* DD, 
 												 int* k_Q, 
-												 int numberOfBCnodes, 
+												 unsigned int numberOfBCnodes, 
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
@@ -4541,7 +4541,7 @@ extern "C" __global__ void QPressDeviceZero27(	 real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceFake27(	 real* rhoBC,
+__global__ void QPressDeviceFake27(	 real* rhoBC,
 												 real* DD, 
 												 int* k_Q, 
 												 int* k_N, 
@@ -4815,7 +4815,7 @@ extern "C" __global__ void QPressDeviceFake27(	 real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDevice27_IntBB(real* rho,
+__global__ void QPressDevice27_IntBB(real* rho,
 												real* DD, 
 												int* k_Q, 
 												real* QQ,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Random.cu b/src/gpu/VirtualFluids_GPU/GPU/Random.cu
index 04e3d3aba2f168043fecb3b83498671cd8474de7..a605fbd42d2977e0f0b6e15aeb50f8c78654f31c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Random.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Random.cu
@@ -12,7 +12,7 @@ using namespace vf::lbm::dir;
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void initRandom(curandState* state)
+__global__ void initRandom(curandState* state)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -36,7 +36,7 @@ extern "C" __global__ void initRandom(curandState* state)
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void generateRandomValues(curandState* state, real* randArray)
+__global__ void generateRandomValues(curandState* state, real* randArray)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu
index 1347f8dd0e5031d55a4417b3ba59493d0f6db311..d9a18b8875a8801d1df9fbf6baef04c7cbf4ab0f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_0817_comp_27( real* DC, 
+__global__ void scaleCF_0817_comp_27( real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -4083,7 +4083,7 @@ extern "C" __global__ void scaleCF_0817_comp_27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC, 
+__global__ void scaleCF_AA2016_comp_27(real* DC, 
 												  real* DF, 
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -10966,7 +10966,7 @@ extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, 
+__global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, 
 														real* DF, 
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -17841,7 +17841,7 @@ extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_RhoSq_comp_27(real* DC, 
+__global__ void scaleCF_RhoSq_comp_27(real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -22089,7 +22089,7 @@ extern "C" __global__ void scaleCF_RhoSq_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_staggered_time_comp_27(   real* DC, 
+__global__ void scaleCF_staggered_time_comp_27(   real* DC, 
 															 real* DF, 
 															 unsigned int* neighborCX,
 															 unsigned int* neighborCY,
@@ -26325,7 +26325,7 @@ extern "C" __global__ void scaleCF_staggered_time_comp_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_Fix_comp_27(  real* DC, 
+__global__ void scaleCF_Fix_comp_27(  real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -31092,7 +31092,7 @@ extern "C" __global__ void scaleCF_Fix_comp_27(  real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_NSPress_27(   real* DC, 
+__global__ void scaleCF_NSPress_27(   real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -35036,7 +35036,7 @@ extern "C" __global__ void scaleCF_NSPress_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_Fix_27(   real* DC, 
+__global__ void scaleCF_Fix_27(   real* DC, 
                                              real* DF, 
                                              unsigned int* neighborCX,
                                              unsigned int* neighborCY,
@@ -39294,7 +39294,7 @@ extern "C" __global__ void scaleCF_Fix_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFpress27(   real* DC, 
+__global__ void scaleCFpress27(   real* DC, 
                                              real* DF, 
                                              unsigned int* neighborCX,
                                              unsigned int* neighborCY,
@@ -40968,7 +40968,7 @@ extern "C" __global__ void scaleCFpress27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFLast27( real* DC, 
+__global__ void scaleCFLast27( real* DC, 
                                           real* DF, 
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -43203,7 +43203,7 @@ extern "C" __global__ void scaleCFLast27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFThSMG7(    real* DC, 
+__global__ void scaleCFThSMG7(    real* DC, 
                                              real* DF,
                                              real* DD7C, 
                                              real* DD7F, 
@@ -44430,7 +44430,7 @@ extern "C" __global__ void scaleCFThSMG7(    real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFThS7(   real* DC, 
+__global__ void scaleCFThS7(   real* DC, 
                                           real* DF,
                                           real* DD7C, 
                                           real* DD7F, 
@@ -45553,7 +45553,7 @@ extern "C" __global__ void scaleCFThS7(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFThS27(     real* DC, 
+__global__ void scaleCFThS27(     real* DC, 
                                              real* DF,
                                              real* DD27C, 
                                              real* DD27F, 
@@ -47243,7 +47243,7 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFEff27(real* DC, 
+__global__ void scaleCFEff27(real* DC, 
                                         real* DF, 
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -48953,7 +48953,7 @@ extern "C" __global__ void scaleCFEff27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF27(real* DC, 
+__global__ void scaleCF27(real* DC, 
                                      real* DF, 
                                      unsigned int* neighborCX,
                                      unsigned int* neighborCY,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu
index 14e36971c87df7d83ab899cb141979ae9707bad9..e27503c5d2a0a362f04980cc21d0afcc23959467 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
+__global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 													 real* DF,
 													 real* G6,
 													 unsigned int* neighborCX,
@@ -4361,7 +4361,7 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
+__global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
index 773a5829d53be7966008758e1271c4754348287c..fc2e1926b37d170f25958ec60328ac665c6b1467 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_0817_comp_27( real* DC, 
+__global__ void scaleFC_0817_comp_27( real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -1210,7 +1210,7 @@ extern "C" __global__ void scaleFC_0817_comp_27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC, 
+__global__ void scaleFC_AA2016_comp_27(real* DC, 
 												  real* DF, 
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -5399,7 +5399,7 @@ extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, 
+__global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, 
 														real* DF, 
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -11056,7 +11056,7 @@ __device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned i
     }
 }
 
-extern "C" __global__ void scaleFC_RhoSq_comp_27(real* DC, 
+__global__ void scaleFC_RhoSq_comp_27(real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -11149,7 +11149,7 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_staggered_time_comp_27(   real* DC, 
+__global__ void scaleFC_staggered_time_comp_27(   real* DC, 
 															 real* DF, 
 															 unsigned int* neighborCX,
 															 unsigned int* neighborCY,
@@ -13270,7 +13270,7 @@ extern "C" __global__ void scaleFC_staggered_time_comp_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_Fix_comp_27(  real* DC, 
+__global__ void scaleFC_Fix_comp_27(  real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -15130,7 +15130,7 @@ extern "C" __global__ void scaleFC_Fix_comp_27(  real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_NSPress_27(   real* DC, 
+__global__ void scaleFC_NSPress_27(   real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -16336,7 +16336,7 @@ extern "C" __global__ void scaleFC_NSPress_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_Fix_27(   real* DC, 
+__global__ void scaleFC_Fix_27(   real* DC, 
                                              real* DF, 
                                              unsigned int* neighborCX,
                                              unsigned int* neighborCY,
@@ -17696,7 +17696,7 @@ extern "C" __global__ void scaleFC_Fix_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCpress27(real* DC, 
+__global__ void scaleFCpress27(real* DC, 
                                           real* DF, 
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -18621,7 +18621,7 @@ extern "C" __global__ void scaleFCpress27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCLast27( real* DC, 
+__global__ void scaleFCLast27( real* DC, 
                                           real* DF, 
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -20017,7 +20017,7 @@ extern "C" __global__ void scaleFCLast27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCThSMG7(    real* DC, 
+__global__ void scaleFCThSMG7(    real* DC, 
                                              real* DF, 
                                              real* DD7C, 
                                              real* DD7F, 
@@ -20890,7 +20890,7 @@ extern "C" __global__ void scaleFCThSMG7(    real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCThS7(   real* DC, 
+__global__ void scaleFCThS7(   real* DC, 
                                           real* DF, 
                                           real* DD7C, 
                                           real* DD7F, 
@@ -21681,7 +21681,7 @@ extern "C" __global__ void scaleFCThS7(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCThS27(     real* DC, 
+__global__ void scaleFCThS27(     real* DC, 
                                              real* DF, 
                                              real* DD27C, 
                                              real* DD27F, 
@@ -22804,7 +22804,7 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCEff27(real* DC, 
+__global__ void scaleFCEff27(real* DC, 
                                         real* DF, 
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -23783,7 +23783,7 @@ extern "C" __global__ void scaleFCEff27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC27(real* DC, 
+__global__ void scaleFC27(real* DC, 
                                      real* DF, 
                                      unsigned int* neighborCX,
                                      unsigned int* neighborCY,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu
index 4e55569b1b296476af782a1176e8e4420d3e2022..68642b8b53011504f53eaabf2f67fdbce6d224c3 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
+__global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 													 real* DF,
 													 real* G6,
 													 unsigned int* neighborCX,
@@ -1261,7 +1261,7 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
+__global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
index d90cfab98cea27f6d3e5d18f772377326cbc5f6d..ba2c0c5040228bff65ea9494f0640e88cfc74021 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::dir;
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void PressSchlaff27(real* rhoBC,
+__global__ void PressSchlaff27(real* rhoBC,
                                           real* DD,
                                           real* vx0,
                                           real* vy0,
@@ -274,7 +274,7 @@ extern "C" __global__ void PressSchlaff27(real* rhoBC,
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void VelSchlaff27(  int t,
+__global__ void VelSchlaff27(  int t,
                                           real* DD,
                                           real* vz0,
                                           real* deltaVz0,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
index 303cf6424607e0f3427ed3735cc0137d7a54028e..1d44157ef2bde4d778be96fa43113da3a829e697 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void GetVeloforForcing27( real* DD, 
+__global__ void GetVeloforForcing27( real* DD, 
 												int* bcIndex, 
 												int nonAtBC, 
 												real* Vx,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
index b092f7ba9aa7bcc3eaf4bde33f5a1151f04a0bf7..717a41465ddedd5c170fddc273f7916b24f8cf49 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipDevice27(real* DD, 
+__global__ void QSlipDevice27(real* DD, 
                                          int* k_Q, 
                                          real* QQ,
                                          unsigned int numberOfBCnodes,
@@ -658,7 +658,7 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipDeviceComp27(
+__global__ void QSlipDeviceComp27(
                                     real* distributions, 
                                     int* subgridDistanceIndices, 
                                     real* subgridDistances,
@@ -1169,7 +1169,7 @@ extern "C" __global__ void QSlipDeviceComp27(
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD, 
+__global__ void QSlipDeviceComp27TurbViscosity(real* DD, 
 											 int* k_Q, 
 											 real* QQ,
 											 unsigned int numberOfBCnodes,
@@ -1874,7 +1874,7 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipGeomDeviceComp27(real* DD, 
+__global__ void QSlipGeomDeviceComp27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
 												 unsigned int  numberOfBCnodes,
@@ -2760,7 +2760,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipNormDeviceComp27(real* DD, 
+__global__ void QSlipNormDeviceComp27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
 												 unsigned int  numberOfBCnodes,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
index 6a966e46f7a2e21bd825801bbe6be1df207303d2..aff80457eda8011c1110175db3662309361a8684 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
@@ -48,7 +48,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
+__host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
                                                          real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ,
                                                          real* vx, real* vy, real* vz,
                                                          real* vx_el,      real* vy_el,      real* vz_el,      //!>mean (temporally filtered) velocities at exchange location
@@ -135,7 +135,7 @@ extern "C" __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
 }
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QStressDeviceComp27(real* DD,
+__global__ void QStressDeviceComp27(real* DD,
 											   int* k_Q,
                                     int* k_N,
 											   real* QQ,
@@ -913,7 +913,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
 }
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void BBStressDevice27( real* DD,
+__global__ void BBStressDevice27( real* DD,
 											            int* k_Q,
                                              int* k_N,
                                              real* QQ,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
index 9df139b63ab4d7e636c8e976ee92bb8d38b595c6..df4eb817e453cf168f7df79abe110bfd39a14e52 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
@@ -15,14 +15,14 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 /////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
+__global__ void QVelDeviceCompThinWallsPartOne27(
 	real* vx,
 	real* vy,
 	real* vz,
 	real* DD, 
 	int* k_Q, 
 	real* QQ,
-	int numberOfBCnodes, 
+	uint numberOfBCnodes, 
 	real om1, 
 	uint* neighborX,
 	uint* neighborY,
@@ -447,7 +447,7 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceCompThinWallsPartOne27(
+__global__ void QDeviceCompThinWallsPartOne27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
@@ -872,7 +872,7 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QThinWallsPartTwo27(
+__global__ void QThinWallsPartTwo27(
 	real* DD, 
 	int* k_Q, 
 	real* QQ,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
index 2c81a9588ef36d225bec67ee1c764f2e2fc7665e..f8cf8ab13c39d55477bf006cd27f7943dcb5b53a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
@@ -19,7 +19,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcTurbulenceIntensity(
+__global__ void CalcTurbulenceIntensity(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu
index 76d799747e846f9f0b17cd7bbd4734186b15989f..ed7a199e414709c6a3beff69374989fef2884dc2 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu
@@ -8,7 +8,7 @@
 
 using namespace vf::lbm::constant;
 
-extern "C" __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM, uint& kP, uint* typeOfGridNode, real* vx, real* vy, real* vz, real& dvx, real& dvy, real& dvz)
+__host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM, uint& kP, uint* typeOfGridNode, real* vx, real* vy, real* vz, real& dvx, real& dvy, real& dvz)
 {
     bool fluidP = (typeOfGridNode[kP] == GEO_FLUID);
     bool fluidM = (typeOfGridNode[kM] == GEO_FLUID);
@@ -19,7 +19,7 @@ extern "C" __host__ __device__ __forceinline__ void calcDerivatives(const uint&
     dvz = ((fluidP ? vz[kP] : vz[k])-(fluidM ? vz[kM] : vz[k]))*div;
 }
 
-extern "C" __global__ void calcAMD(real* vx,
+__global__ void calcAMD(real* vx,
                         real* vy,
                         real* vz,
                         real* turbulentViscosity,
@@ -72,7 +72,7 @@ extern "C" __global__ void calcAMD(real* vx,
     turbulentViscosity[k] = max(c0o1,-SGSConstant*enumerator)/denominator;
 }
 
-extern "C" void calcTurbulentViscosityAMD(Parameter* para, int level)
+void calcTurbulentViscosityAMD(Parameter* para, int level)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, para->getParH(level)->numberOfNodes);
     calcAMD<<<grid.grid, grid.threads>>>(
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h
index 293e61bcb39a1525b8fd19970aa9a1a2beea13f3..c0f104f408469ebb65bca5ada4c53ba9a6b13829 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h
@@ -5,6 +5,6 @@
 
 class Parameter;
 
-extern "C" void calcTurbulentViscosityAMD(Parameter* para, int level);
+void calcTurbulentViscosityAMD(Parameter* para, int level);
 
 #endif //TURBULENT_VISCOSITY_H_
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
index 31cf9947e3112222d07a32a58abb9c7da9aed4c0..edaa7e16017bbf56da61d07082f7b1d3262d254d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
@@ -16,7 +16,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompPlusSlip27(
+__global__ void QVelDeviceCompPlusSlip27(
 													real* vx,
 													real* vy,
 													real* vz,
@@ -553,7 +553,7 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
+__global__ void QVeloDeviceEQ27(real* VeloX,
 										   real* VeloY,
 										   real* VeloZ,
                                            real* DD, 
@@ -833,7 +833,7 @@ extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVeloStreetDeviceEQ27(
+__global__ void QVeloStreetDeviceEQ27(
 	real* veloXfraction,
 	real* veloYfraction,
 	int*  naschVelo,
@@ -1119,8 +1119,7 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
-													int iny,
+__global__ void QVelDeviceIncompHighNu27(
 													real* vx,
 													real* vy,
 													real* vz,
@@ -1618,7 +1617,7 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompHighNu27(
+__global__ void QVelDeviceCompHighNu27(
 													real* vx,
 													real* vy,
 													real* vz,
@@ -2194,7 +2193,7 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompZeroPress27(
+__global__ void QVelDeviceCompZeroPress27(
 														real* velocityX,
 														real* velocityY,
 														real* velocityZ,
@@ -2620,7 +2619,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
+__global__ void QVelDeviceCompZeroPress1h27( int inx,
 														int iny,
 														real* vx,
 														real* vy,
@@ -3091,7 +3090,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_BC_Vel_West_27( int nx, 
+__global__ void LB_BC_Vel_West_27( int nx, 
                                               int ny, 
                                               int nz, 
                                               int itz, 
@@ -3414,7 +3413,7 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevPlainBB27(
+__global__ void QVelDevPlainBB27(
    real* velocityX,
    real* velocityY,
    real* velocityZ,
@@ -3605,7 +3604,7 @@ extern "C" __global__ void QVelDevPlainBB27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevCouette27(real* vx,
+__global__ void QVelDevCouette27(real* vx,
 											real* vy,
 	 										real* vz,
 											real* DD,
@@ -3965,7 +3964,7 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDev1h27( int inx,
+__global__ void QVelDev1h27( int inx,
 										int iny,
 										real* vx,
 										real* vy,
@@ -4748,7 +4747,7 @@ extern "C" __global__ void QVelDev1h27( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceComp27(
+__global__ void QVelDeviceComp27(
 											real* velocityX,
 											real* velocityY,
 											real* velocityZ,
@@ -5171,7 +5170,7 @@ extern "C" __global__ void QVelDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevice27(int inx,
+__global__ void QVelDevice27(int inx,
                                         int iny,
                                         real* vx,
                                         real* vy,
@@ -5724,7 +5723,7 @@ extern "C" __global__ void QVelDevice27(int inx,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void PropellerBC(unsigned int* neighborX,
+__global__ void PropellerBC(unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        real* rho,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
index 064e9415ccd32c291fbee980b41a3b187d470eee..481a8557c5cf98cc30f5f240a8eeb17e04e17c88 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
@@ -11,7 +11,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
+__global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
index 6da362fafcd67e8a4ddadf9405f92e66bba4c7b4..eaeab07ee843ec065094958019580082650f2653 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::dir;
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void WallFunction27(
+__global__ void WallFunction27(
 										  real* vx,
 										  real* vy,
 										  real* vz,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
index 3ccba979c6393365d32209a8d7d82c0ffd7dce56..4c4c1839d57b5af5b06d156747531c5e890e93c3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
+__global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh
index e96bccd492e57cbfb23298e2587648881d9d980c..a5482a10ca15fc3f27245acbe8b47a06d9f917f0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
+__global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
index 561ea4924be31c0ac8b6da822a8720336b163210..f85f3098b44c05e0b4864ae886eb99b6ebee148c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh
index 01e64563499a3a4d0a515beee7c95cba75949640..bea40443ab619fb08b4c5656105c7792c4f11bd1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
index f64b5f7681680f4c3b396964409a6568d8f97d0c..552b15ef2aa249a0f7cfffc3b40fe45429e3174a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
index 9833a2bcdd0b13cbf229f97e43eced4c87009242..a6d94de4fadb9a93a9e5fed63d87731b12ec2a07 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
index 0cad7d9cdffc85f744dcc65ac38505b50c3befca..f8482183dd4ce01340c1d33db2fbadc1f7c873ef 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
index fe489784861839d778d65e9ddf14ed449a9ad7e0..25a17ddbd7038635a2beb2c39212822cbf762034 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
index a9cf249541b2e42768c0ad72611f3f7626b71b5b..24489cffd4d172c5355f84a1549a2f38304d0e86 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 
-extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
+__global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh
index fb6ef2eea656f29e9e788ca80dbf8aef801055d5..59a5240862ed92a9ea3e9187c503ee9233da7e5a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
+__global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
index 368785c5cc50bb9edc30cda9129fe655278f2f0d..9031539d6cf293ba60fdb4054344275b853367fd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
+__global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh
index 82e41dd80ab03e0d7ce8cee854fffa009c7fefee..9e991ffa4b16e0df78fe23f6ee5a1e0678919cd7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(	real omega,
+__global__ void LB_Kernel_BGK_Plus_Comp_SP_27(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
index ef29cf9593578696f74bbd7648f99f0b6038a12c..5183f35c8f4fc27c35e9849e6e8eb1f0995dcf80 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
+__global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh
index 6c9f8852ae7b0aab2b82897cb5f1905f780b983d..8a49bd02af3ab420b42bc257e8668dd3ff9eca2c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real s9,
+__global__ void LB_Kernel_Cascade_Comp_SP_27(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
index 422d642def822e6e37dbb7efbcb9a1439af677fd..924b6e7fb289e31d2b55ff66f2080fd61242c62b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh
index 7146787201baf348c84b593772185dfa4d1cd506..cf6a926698e4082383b04c5f1e2d886c6dca6380 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real s9,
+__global__ void LB_Kernel_Cum_Comp_SP_27(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
index a52e771c2b5294eee72c9c81d29b8f2e8de7e49e..bca76efa2e1bc118576750e637e69dbce95bdc50 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
+__global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh
index 27f981228c1e2be8e29d176d0a234534c1e61b5b..5f23194d561d106cf2493c36199444f8da15efd7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
+__global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
index a8ec8d7e25beb6b0f88c53ec9b2eade11f760b4b..9d63c0e7f33af57922755666d36fe3eaac8558fe 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15Comp(real omega,
+__global__ void LB_Kernel_CumulantK15Comp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
index 208fbec553507812bfe4339577734292b248c027..149aab34a16673ddd707427c222a56cf18d127ca 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15Comp(	real omega,
+__global__ void LB_Kernel_CumulantK15Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
index de7f5ce74c4dfd5ea68bb6bdd45fcc3d9fa3ab53..227747045cc2af9b3a7b581393412e04fb4d38c3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh
index caeb68e3077393bc786a022eeb3c60e6d0a3c059..e1d60ab0a181b88fba81bdb5ff649d7971c235a7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh
@@ -5,7 +5,7 @@
 #include <curand.h>
 
 
-extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
index a65aa95802971e40af9f50c93fda578d4be24ebb..f046c3343a30317600f2f95e001233d6bd0dbb98 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
+__global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh
index a55ec0ada7e0a05d05285bc29406d1a497467a54..3a526112e56947a77d5ac337052d62e95dd4e578 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omega,
+__global__ void LB_Kernel_CumulantK15SpongeComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
index 9bc2bb5877197ab21c2cd82b4b672c8660a12211..cea958854f4d891092ffb0899ec8e552da456acd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 
-extern "C" __global__ void LB_Kernel_CumulantK17Comp(real omega,
+__global__ void LB_Kernel_CumulantK17Comp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
index 4e6fea77b1e0364b5f56d2fd560d245f60343fe1..f44842057d554498b0b5d4c733e2425e524a3b75 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17Comp(	real omega,
+__global__ void LB_Kernel_CumulantK17Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
index dce86433b943a54192a41d5ea1831b4e3fe76ce1..fb91946f69fdddf850f6c69e15750f33b9e46cf7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh
index d7f1f263be6664a6d39c57c98ba63699da662c2b..04448787256cb1cfeef46c5d9b7146918e6a4c38 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
index 6aab2076860fd2ea22cec8c3948ae2639f5c1780..a0a79b1a0ea2451fd39ec12e973fc2143cb8c8ad 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
@@ -40,7 +40,7 @@ using namespace vf::lbm::dir;
 #include "Kernel/ChimeraTransformation.h"
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
+__global__ void LB_Kernel_CumulantK17CompChim(
 	real omega,
 	uint* typeOfGridNode,
 	uint* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
index 1d42d65f020dd7393321498666a298630883f6ad..a480278652ca3bae0122d33b2655e2210d203727 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
+__global__ void LB_Kernel_CumulantK17CompChim(
 	real omega,
 	uint* typeOfGridNode,
 	uint* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
index aab6c580396efcac8056c355d5047a3062c750d1..d9498f5573bb6ebb2d4ec90b9c9d2d4a572ea45c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
@@ -40,7 +40,7 @@ using namespace vf::lbm::dir;
 #include "Kernel/ChimeraTransformation.h"
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
+__global__ void LB_Kernel_CumulantK17CompChimStream(
 	real omega,
 	uint* neighborX,
 	uint* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
index 035e438ccf8df70c5df43c70f0b4a5ffe160acc8..f74192c0423ba9dc96820d7f46eecb9d49a39ed4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
+__global__ void LB_Kernel_CumulantK17CompChimStream(
 	real omega,
 	uint* neighborX,
 	uint* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
index 66170ecb09effc282c17ea42d3a2e4a07b8313b6..9b43aec896cd4179fd17c7b5cd9252c69939cdce 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK18Comp(
+__global__ void LB_Kernel_CumulantK18Comp(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh
index 1d2358370ecd9baabacdda345973324e7e2382f7..60a15145e3c117cadb7485f2899ba768b10eb0c1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK18Comp(	real omega,
+__global__ void LB_Kernel_CumulantK18Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
index acbca26e9cb37b74a59efe688143934acb65f335..91b695e98fc91dc73daf4d4b7ca194e036138924 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK20Comp(
+__global__ void LB_Kernel_CumulantK20Comp(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh
index f6280969d1f501f69cbf912fca07b14cbe458b25..17691f621b5a46d29556d71304195f4a346a7ec6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK20Comp(	real omega,
+__global__ void LB_Kernel_CumulantK20Comp(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
index e25926fd8186bb1fdb09f8610bc5e4719614f645..b30924dbe4128922475201a67ca2172d51bb4bb0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh
index 39b7562a28c39df06b9fe06a431001939c33deb6..947ce68259432efe87af84fd9986916e62521397 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
index db2314b67f8c4cdfea156bd76f6eee2c3cb11f59..a1151dbe0729bb244b8727b7497587875861be92 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh
index 20cdefb5a6acd961b9f93ca5cb759bfe742dc990..f1a90b45238a2df4d93860d7e77cb1242b9fbd90 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
index ab28fc544e66de7e31050ab125c9ca7bef40e260..1f29867de8204fa642993326fd50095dff40b539 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh
index 327ea4ddc933ccf5e9e75a7fd6d44e4d59b86d56..7f85f3ca29d4d8d7620e2503df9947fd7e42fe8f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
index d88f7492c9bf366a57155826fb57d328816cc2b2..adc89198fde079c1c559de85758967aad3e385bd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh
index c9f6ec42f8ce1512e84c193484c48e78d827f71a..a531fa7bd64b9782f43f800b29fe666504612f1a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real s9,
+__global__ void LB_Kernel_Cascade_Incomp_SP_27(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
index 9d3fe2781a4cc3fabb4ee24a5b980360ab324b94..3f3dff25e22f14a398ca9deb308dd254073cea8b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 	real deltaPhi,
 	real angularVelocity,
 	unsigned int* bcMatD,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh
index 3117d4e1e34d1d086711e9c5f1f2f6ecf9c4705f..0679e770f244ad2e2c59c9cacf9d3524a640a42e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 	real deltaPhi,
 	real angularVelocity,
 	unsigned int* bcMatD,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
index bfcb586e71f96b0f92d9a0331c38f06070848adb..0402fa1790a32eb15fea790bac169a7725bc2818 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh
index 9ed2f167ecb199429c7497acc72f298db41a0dc2..57dc7180ce7900333b4071db409a03ac847dd641 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
index 1727427aef2de70c3810cf08e68128c00e4aab5a..5a5c86a5da93c99c8743d0190bfae4ec32866731 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real omega,
+__global__ void LB_Kernel_CumulantK15Incomp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh
index 112e9f7a99bf683e8bacb348633f46a10e149e9e..f2b5063f9db6d55b9efb547c0c05d450463e0509 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real s9,
+__global__ void LB_Kernel_CumulantK15Incomp(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
index 700cd974b1c739dd5cb3722917453707e465972f..16b4c3841f397f62e52d6f3963b47864d29002d1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh
index c9a353d6e08c4110a546416fd5185af68b837c65..d3a9fcea7c1a53e4084acf8dc5f1f815d0da967d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
index e961c35e37dc870affdfe4abec9766e41fc47fdc..2612455452b5931619e4d99f548f52b12de64721 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
+__global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
index e02e8abe1321d4b9b47872f9a5e7c5414a587531..6533c604f32a478cdc6a097e4dd7d0b56e48150d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
+__global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
index c7e5c26a8e3c4004fd166693f8bd965a29770ada..8bd376e555e5b15c5672f03cd371938665eb06df 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
@@ -49,7 +49,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
+__global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 	real omega_in,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -784,7 +784,7 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 // #include "lbm/MacroscopicQuantities.h"
 
 // ////////////////////////////////////////////////////////////////////////////////
-// extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
+// __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 // 	real omega_in,
 // 	uint* typeOfGridNode,
 // 	uint* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
index 7f6738a9b6e39d63775a6490c1248f020fb4ccca..3633dfa4e18d057d60acf68faf348d3c5f5855d8 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
+__global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 	real omega_in,
 	uint* typeOfGridNode,
 	uint* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
index fa3f22d021518553df0dee79079868e01f4a2427..eddc22e12c0bf619994bac432cf864f8a5fc60f7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(
+__global__ void LB_Kernel_WaleCumulantK15Comp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh
index 409d7d0862479299e92c93aaa50085d673bb95f0..fad3eb11434b9c3fd216a7698b9275d4af43245c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(real omega,
+__global__ void LB_Kernel_WaleCumulantK15Comp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
index c705be07519225534d3532a2b752d2a29516b102..85578d9063a5d47ef121d8c553ffbaa6b8a82e80 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
+__global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh
index 593a7cef915e605f28bfbcbd362a6600b6c6224d..4297404073aacc0acd01b84c35cbae3d1081ed5b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(real omega_in,
+__global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
index 380fc5258f32fb908a977f28a15688aa5d9b650d..46f74322c6c46abc2806da5abd2403c02c70566e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
+__global__ void LB_Kernel_WaleCumulantK17Comp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh
index 34e2c094cd9a2d7ecb1dc064db954d5b6e211f8f..459c833e2bd3f0bfd2a0c214a9d366bcfb3a4b49 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
+__global__ void LB_Kernel_WaleCumulantK17Comp(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
index c702c252be0ca9a51561d8aba68bc21e0f3b9902..0c0234fdde8ec76ae73119f9b911b1af7d2118e4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
+__global__ void LB_Kernel_WaleCumulantK17DebugComp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh
index 6f4253639f5d7bfce82f5909914c793955ef794a..50da48fa702862b71dc13b8b21b34cd1e2b39250 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
+__global__ void LB_Kernel_WaleCumulantK17DebugComp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.h b/src/gpu/VirtualFluids_GPU/Particles/Particles.h
index 4afff9e6969be80719a4bc146b5f41796f5ca33a..7a6d003a08ef7f6517b6259c2c1b895676c6d80b 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.h
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.h
@@ -7,12 +7,12 @@
 #include "Core/StringUtilities/StringUtil.h"
 #include "Parameter/Parameter.h"
 
-//extern "C" void calcDragLift(Parameter* para, int lev);
-extern "C" void allocParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void initParticles(Parameter* para);
-extern "C" void propagateParticles(Parameter* para, unsigned int t);
-extern "C" void copyAndPrintParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager, unsigned int t, bool isInit);
+//void calcDragLift(Parameter* para, int lev);
+void allocParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void initParticles(Parameter* para);
+void propagateParticles(Parameter* para, unsigned int t);
+void copyAndPrintParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager, unsigned int t, bool isInit);
 
-extern "C" void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
index d4d4b97681d6596b4bc3752d74774f035256b9c4..c1902be602a52630ace8cbf124627d83a702121e 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 
-extern "C" __global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh
index 1a21f872f7adbde555e6d45a233c93eb7ff3e055..80c46f94549371811025e7f751acda65c858c0f4 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
index c3d7e4a22e4b36f4689967486f9b6c3243c766e7..38cd57fd48e02e410e1ae557088e023ffeadfc4e 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh
index 4eb8b9eeb70876f39bc8c0b58a8789fb239530a0..7dc17056bca6116aa6e98367a78e92320813b502 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
index 2a5b8898643ec43c10302e372a1931a165ca1e4e..d4ec203edc7d5b1f672745b0ac6afd8c37290fbf 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::dir;
 
 #include <stdio.h>
 
-extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -179,7 +179,7 @@ extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
+__global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
                                                    unsigned int* neighborY,
                                                    unsigned int* neighborZ,
                                                    unsigned int* neighborWSB,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh
index dd9dbd7d0b5d783bdbdcc05dd6c38fad0d60b79f..0e1d89b9ac4778a1fdb4d8e72cd13be5eb3920d1 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -16,7 +16,7 @@ extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 	real* DD,
 	bool EvenOrOdd);
 
-extern "C" __global__ void LB_Init_Comp_Neq_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_Neq_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* neighborWSB,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
index 522c433d4184aaae9dc458bdb70f3e5491cad2d0..2b66a69d97388054925a442965ef85416f50d912 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 
-extern "C" __global__ void LB_Init_F3(unsigned int* neighborX,
+__global__ void LB_Init_F3(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh
index 0df2f099bfede16d19edf171016e1a20e1efe0b7..71750c0fde2722a900800f742d801b6caf2febbf 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_F3(unsigned int* neighborX,
+__global__ void LB_Init_F3(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
index 2ab2ec33fcf0131b1fc463327f1d927bb0c5819e..c541e28bac1b9b6861116e0422d6724e936ac4c0 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh
index a67b7a9f446fc49983d0a9f2e309dfaec3649547..85b8f1b7ae9f814b1a15aa2a8af4aa2d7c3645b5 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
index 2b0e85f38443889fcdbe64bb2cef6a02a7f654a9..94a4352d43dee67117f66eaf03536c5ea3e15edd 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh
index 9dedbf270e6e53283d41d315d67135f9c57940db..2d381e6e0531b900cee90eb8f7e2f3d4ff8615f7 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
index 27dd33b14086730b157d1ffba495d9ecc5b7b221..cd5362a51c002660eb51df6b7ec376099b135c98 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh
index 038b10f7bd85786fe3b3ef4c4d70d11816543def..2cb9c74506862e4c4766b2416dfc7f1a0024e6b0 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h b/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h
index 9c1b2d86da5823c552e14c686430cdf30e21f85d..8704431e95d86ddfc4df8f14ed189592250283be 100644
--- a/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h
+++ b/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h
@@ -6,17 +6,17 @@
 #include "Parameter/Parameter.h"
 
 
-extern "C" void findTempPress(Parameter* para);
+void findTempPress(Parameter* para);
 
-extern "C" void findKforTempPress(Parameter* para);
+void findKforTempPress(Parameter* para);
 
-extern "C" void findTempVel(Parameter* para);
+void findTempVel(Parameter* para);
 
-extern "C" void findKforTempVel(Parameter* para);
+void findKforTempVel(Parameter* para);
 
-extern "C" void findTemp(Parameter* para);
+void findTemp(Parameter* para);
 
-extern "C" void findKforTemp(Parameter* para);
+void findKforTemp(Parameter* para);
 
 
 #endif