From 0868f9da3c5dc9055f0de0743a65b2be2f721d60 Mon Sep 17 00:00:00 2001
From: "LEGOLAS\\lenz" <lenz@irmb.tu-bs.de>
Date: Tue, 5 May 2020 15:29:53 +0200
Subject: [PATCH] changes compilerflags for SEPARABLE COMPILATION with brings
 substantial performance boost

---
 CMakeLists.txt                                | 12 +--
 CMakeMacros/Cuda/Link.cmake                   |  2 +-
 MachineFiles/login01                          |  2 +-
 MachineFiles/login02                          |  2 +-
 .../FlowStateData/ThermalDependencies.cuh     |  4 -
 targets/apps/GKS/Flame7cm/Flame7cm.cpp        | 70 +++++++-------
 targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp  | 36 +++----
 .../GKS/SandiaFlame_1m/SandiaFlame_1m.cpp     | 96 +++++++++----------
 targets/apps/GKS/SingleGPU/SingleGPU.cpp      | 37 +++----
 targets/libs/GridGenerator/CMakeLists.txt     |  2 +
 10 files changed, 131 insertions(+), 132 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d03261516..ac5d8b33a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,7 +107,7 @@ ENDIF(MSVC)
 
 IF( VF.BUILD_VF_GKS )
   # only use this with device of CC larger than 6.0
-  set(CMAKE_CUDA_FLAGS " -arch=sm_60" CACHE STRING "" FORCE)
+  set(CMAKE_CUDA_FLAGS " -arch=sm_60 -Xptxas=\"-v\"" CACHE STRING "" FORCE)
 ENDIF()
 
 set(CMAKE_CUDA_FLAGS_DEBUG " -G" CACHE STRING "" FORCE)
@@ -171,7 +171,7 @@ IF (VF.BUILD_VF_GKS)
 
     #add_subdirectory(targets/apps/GKS/ThermalCavityMultiGPU)
     #add_subdirectory(targets/apps/GKS/DrivenCavityMultiGPU)
-    add_subdirectory(targets/apps/GKS/RayleighBenardMultiGPU)
+    #add_subdirectory(targets/apps/GKS/RayleighBenardMultiGPU)
 
     #add_subdirectory(targets/apps/GKS/SalinasVazquez)
     #add_subdirectory(targets/apps/GKS/BoundaryJet)
@@ -187,13 +187,13 @@ IF (VF.BUILD_VF_GKS)
     #add_subdirectory(targets/apps/GKS/ConcreteHeatFluxBCTest)
     
     #add_subdirectory(targets/apps/GKS/PoolFire)
-    #add_subdirectory(targets/apps/GKS/Flame7cm)
-    #add_subdirectory(targets/apps/GKS/SandiaFlame_1m)
+    add_subdirectory(targets/apps/GKS/Flame7cm)
+    add_subdirectory(targets/apps/GKS/SandiaFlame_1m)
     #add_subdirectory(targets/apps/GKS/Candle)
     
     #add_subdirectory(targets/apps/GKS/MultiGPU)
-    #add_subdirectory(targets/apps/GKS/MultiGPU_nD)
-    #add_subdirectory(targets/apps/GKS/SingleGPU)
+    add_subdirectory(targets/apps/GKS/MultiGPU_nD)
+    add_subdirectory(targets/apps/GKS/SingleGPU)
 ELSE()
   MESSAGE( STATUS "exclude Virtual Fluids GKS." )
 ENDIF()
diff --git a/CMakeMacros/Cuda/Link.cmake b/CMakeMacros/Cuda/Link.cmake
index db34e95d8..c1a4ddcc1 100644
--- a/CMakeMacros/Cuda/Link.cmake
+++ b/CMakeMacros/Cuda/Link.cmake
@@ -11,7 +11,7 @@ macro(linkCuda)
     
     INCLUDE_DIRECTORIES( ${CUDA_INCLUDE_DIRS} ${CUDA_CUT_INCLUDE_DIR})
 
-	set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+	#set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
 	#set_property(TARGET ${targetName} PROPERTY CUDA_64_BIT_DEVICE_CODE ON)
 	
     #set(CUDA_NVCC_FLAGS "-G" CACHE TYPE INTERNAL FORCE)
diff --git a/MachineFiles/login01 b/MachineFiles/login01
index 0e884953c..29458c846 100644
--- a/MachineFiles/login01
+++ b/MachineFiles/login01
@@ -10,7 +10,7 @@ SET(BOOST_LIBRARYDIR  "/cluster/lib/boost/1.63.0/gcc/lib"  CACHE PATH "BOOST_LIB
 
 #SET(VTK_DIR "/cluster/lib/vtk/8.1.0/lib/cmake/vtk-8.1" CACHE PATH "VTK directory override" FORCE)
 #SET(VTK_DIR "/home/irmb/tools/VTK/build/VTK-8.2.0" CACHE PATH "VTK directory override" FORCE)
-SET(VTK_DIR "/home/y0054018/software/vtk/VTK-8.1.0/build" CACHE PATH "VTK directory override" FORCE)
+SET(VTK_DIR "/home/stelenz/software/vtk/VTK-8.1.0/build" CACHE PATH "VTK directory override" FORCE)
 
 SET(CUDA_CUT_INCLUDE_DIR "/cluster/cuda/9.0/include;/cluster/cuda/9.0/samples/common/inc" CACHE PATH "CUDA_CUT_INCLUDE_DIR")
 
diff --git a/MachineFiles/login02 b/MachineFiles/login02
index 0e884953c..29458c846 100644
--- a/MachineFiles/login02
+++ b/MachineFiles/login02
@@ -10,7 +10,7 @@ SET(BOOST_LIBRARYDIR  "/cluster/lib/boost/1.63.0/gcc/lib"  CACHE PATH "BOOST_LIB
 
 #SET(VTK_DIR "/cluster/lib/vtk/8.1.0/lib/cmake/vtk-8.1" CACHE PATH "VTK directory override" FORCE)
 #SET(VTK_DIR "/home/irmb/tools/VTK/build/VTK-8.2.0" CACHE PATH "VTK directory override" FORCE)
-SET(VTK_DIR "/home/y0054018/software/vtk/VTK-8.1.0/build" CACHE PATH "VTK directory override" FORCE)
+SET(VTK_DIR "/home/stelenz/software/vtk/VTK-8.1.0/build" CACHE PATH "VTK directory override" FORCE)
 
 SET(CUDA_CUT_INCLUDE_DIR "/cluster/cuda/9.0/include;/cluster/cuda/9.0/samples/common/inc" CACHE PATH "CUDA_CUT_INCLUDE_DIR")
 
diff --git a/src/GksGpu/FlowStateData/ThermalDependencies.cuh b/src/GksGpu/FlowStateData/ThermalDependencies.cuh
index 045d0ffb2..9f3a268a5 100644
--- a/src/GksGpu/FlowStateData/ThermalDependencies.cuh
+++ b/src/GksGpu/FlowStateData/ThermalDependencies.cuh
@@ -20,8 +20,6 @@
 
 namespace GksGpu {
 
-#ifdef USE_PASSIVE_SCALAR
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -56,8 +54,6 @@ __host__ __device__ inline void setLambdaFromT( PrimitiveVariables& prim, real T
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-#endif // USE_PASSIVE_SCALAR
-
 } // namespace GksGpu
 
 
diff --git a/targets/apps/GKS/Flame7cm/Flame7cm.cpp b/targets/apps/GKS/Flame7cm/Flame7cm.cpp
index 0858e6bc1..a9dd32034 100644
--- a/targets/apps/GKS/Flame7cm/Flame7cm.cpp
+++ b/targets/apps/GKS/Flame7cm/Flame7cm.cpp
@@ -85,9 +85,9 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
     real U = 0.0314;
     real rhoFuel = 0.68;
 
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
+    GksGpu::PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
 
-    setLambdaFromT( prim, 3.0 );
+    GksGpu::setLambdaFromT( prim, 3.0 );
 
     real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
 
@@ -111,7 +111,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
 
-    Parameters parameters;
+    GksGpu::Parameters parameters;
 
     parameters.K  = K;
     parameters.Pr = Pr;
@@ -132,8 +132,8 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     parameters.heatOfReaction = dh;
 
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    //parameters.viscosityModel = ViscosityModel::constant;
+    parameters.viscosityModel = GksGpu::ViscosityModel::sutherlandsLaw;
+    //parameters.viscosityModel = GksGpu::ViscosityModel::constant;
 
     parameters.enableReaction = true;
 
@@ -210,9 +210,9 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    CudaUtility::setCudaDevice(_gpuIndex);
+    GksGpu::CudaUtility::setCudaDevice(_gpuIndex);
 
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
+    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -221,11 +221,11 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
     
     real openBoundaryVelocityLimiter = 1.0;
 
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
+    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
+    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
 
-    SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
-    SPtr<BoundaryCondition> bcPX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
+    SPtr<GksGpu::BoundaryCondition> bcMX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
+    SPtr<GksGpu::BoundaryCondition> bcPX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
 
     bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
     bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
@@ -235,19 +235,19 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
     
-    SPtr<BoundaryCondition> bcMY;
-    SPtr<BoundaryCondition> bcPY;
+    SPtr<GksGpu::BoundaryCondition> bcMY;
+    SPtr<GksGpu::BoundaryCondition> bcPY;
 
-    SPtr<BoundaryCondition> bcMY_2;
-    SPtr<BoundaryCondition> bcPY_2;
+    SPtr<GksGpu::BoundaryCondition> bcMY_2;
+    SPtr<GksGpu::BoundaryCondition> bcPY_2;
 
     if( threeDimensional )
     {
-        bcMY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        bcPY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
+        bcMY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
+        bcPY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
 
-        bcMY_2 = std::make_shared<Symmetry>( dataBase, 'y' );
-        bcPY_2 = std::make_shared<Symmetry>( dataBase, 'y' );
+        bcMY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
+        bcPY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
 
         bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
         bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
@@ -257,8 +257,8 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
     }
     else
     {
-        bcMY = std::make_shared<Periodic>(dataBase);
-        bcPY = std::make_shared<Periodic>(dataBase);
+        bcMY = std::make_shared<GksGpu::Periodic>(dataBase);
+        bcPY = std::make_shared<GksGpu::Periodic>(dataBase);
 
         bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*dx; });
         bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*dx; });
@@ -266,19 +266,19 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
     
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
+    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
     //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, true );
     //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) );
     //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase );
 
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
+    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
     
     bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
     bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
 
     //////////////////////////////////////////////////////////////////////////
 
-    SPtr<BoundaryCondition> burner = std::make_shared<CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
+    SPtr<GksGpu::BoundaryCondition> burner = std::make_shared<GksGpu::CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
 
     burner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
         
@@ -318,22 +318,22 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     dataBase->setMesh( meshAdapter );
 
-    CudaUtility::printCudaMemoryUsage();
+    GksGpu::CudaUtility::printCudaMemoryUsage();
     
     if( restartIter == INVALID_INDEX )
     {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
+        GksGpu::Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> GksGpu::ConservedVariables {
 
-            PrimitiveVariables primLocal = prim;
+            GksGpu::PrimitiveVariables primLocal = prim;
 
-            return toConservedVariables(primLocal, parameters.K);
+            return GksGpu::toConservedVariables(primLocal, parameters.K);
         });
 
         writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
     }
     else
     {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
+        GksGpu::Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
 
         writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
     }
@@ -344,7 +344,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
         for( uint level = 0; level < dataBase->numberOfLevels; level++ )
             bc->runBoundaryConditionKernel( dataBase, parameters, level );
 
-    Initializer::initializeDataUpdate(dataBase);
+    GksGpu::Initializer::initializeDataUpdate(dataBase);
 
     dataBase->copyDataDeviceToHost();
 
@@ -359,11 +359,11 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
 
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
+    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
 
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
+    GksGpu::ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
 
-    auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 10 * iterPerSecond );
+    auto turbulenceAnalyzer = std::make_shared<GksGpu::TurbulenceAnalyzer>( dataBase, 10 * iterPerSecond );
 
     turbulenceAnalyzer->collect_UU = true;
     turbulenceAnalyzer->collect_VV = true;
@@ -381,7 +381,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
         convergenceAnalyzer.run( iter );
 
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
+        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
 
         int crashCellIndex = dataBase->getCrashCellIndex();
 
@@ -406,7 +406,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
         if( iter % 10000 == 0 /*|| iter == 39000*/)
         {
             dataBase->copyDataDeviceToHost();
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
+            GksGpu::Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
         }
 
         if( iter % 100000 == 0 )
diff --git a/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp b/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp
index 809ee79de..017df5343 100644
--- a/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp
+++ b/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp
@@ -146,7 +146,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
     //////////////////////////////////////////////////////////////////////////
 
-    Parameters parameters;
+    GksGpu::Parameters parameters;
 
     parameters.K  = 0;
     parameters.Pr = 1;
@@ -228,11 +228,11 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    auto dataBase = std::make_shared<DataBase>("GPU");
+    auto dataBase = std::make_shared<GksGpu::DataBase>("GPU");
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    for ( int i = 0; i < rank % CudaUtility::getCudaDeviceCount(); i++ ) MPI_Barrier(MPI_COMM_WORLD);
+    for ( int i = 0; i < rank % GksGpu::CudaUtility::getCudaDeviceCount(); i++ ) MPI_Barrier(MPI_COMM_WORLD);
 
     {
         GksMeshAdapter meshAdapter(gridBuilder);
@@ -248,24 +248,24 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-        SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>(dataBase);
-        SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>(dataBase);
+        SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Periodic>(dataBase);
+        SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Periodic>(dataBase);
 
         if (sideLengthX == 1) bcMX->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.x < -0.5*L; });
         if (sideLengthX == 1) bcPX->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.x > 0.5*L; });
 
         //////////////////////////////////////////////////////////////////////////
 
-        SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>(dataBase);
-        SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>(dataBase);
+        SPtr<GksGpu::BoundaryCondition> bcMY = std::make_shared<GksGpu::Periodic>(dataBase);
+        SPtr<GksGpu::BoundaryCondition> bcPY = std::make_shared<GksGpu::Periodic>(dataBase);
 
         if (sideLengthY == 1) bcMY->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.y < -0.5*L; });
         if (sideLengthY == 1) bcPY->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.y > 0.5*L; });
 
         //////////////////////////////////////////////////////////////////////////
 
-        SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>(dataBase);
-        SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>(dataBase);
+        SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::Periodic>(dataBase);
+        SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Periodic>(dataBase);
 
         if (sideLengthZ == 1) bcMZ->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.z < -0.5*L; });
         if (sideLengthZ == 1) bcPZ->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.z > 0.5*L; });
@@ -303,14 +303,14 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
         dataBase->setCommunicators(meshAdapter);
 
-        CudaUtility::printCudaMemoryUsage();
+        GksGpu::CudaUtility::printCudaMemoryUsage();
     }
 
-    for ( int i = 0; i < CudaUtility::getCudaDeviceCount() - rank % CudaUtility::getCudaDeviceCount(); i++ ) MPI_Barrier(MPI_COMM_WORLD);
+    for ( int i = 0; i < GksGpu::CudaUtility::getCudaDeviceCount() - rank % GksGpu::CudaUtility::getCudaDeviceCount(); i++ ) MPI_Barrier(MPI_COMM_WORLD);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables
+    GksGpu::Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> GksGpu::ConservedVariables
     {
         real U = 0.1;
 
@@ -332,7 +332,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
         //rhoLocal = rank + 1;
 
-        return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
+        return GksGpu::toConservedVariables( GksGpu::PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
     });
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -343,7 +343,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
         for( uint level = 0; level < dataBase->numberOfLevels; level++ )
             bc->runBoundaryConditionKernel( dataBase, parameters, level );
 
-    Initializer::initializeDataUpdate(dataBase);
+    GksGpu::Initializer::initializeDataUpdate(dataBase);
 
     dataBase->copyDataDeviceToHost();
 
@@ -358,7 +358,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
     const uint numberOfIterations = 1000;
 
-    CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
+    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
 
     MPI_Barrier(MPI_COMM_WORLD);
 
@@ -366,7 +366,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
     for( uint iter = 1; iter <= numberOfIterations; iter++ )
     {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
+        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
 
         cupsAnalyzer.run( iter, parameters.dt );
     }
@@ -452,7 +452,7 @@ int main( int argc, char* argv[])
     //////////////////////////////////////////////////////////////////////////
 
     // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = CudaUtility::getCudaDeviceCount();
+    int deviceCount = GksGpu::CudaUtility::getCudaDeviceCount();
 
     if(deviceCount == 0)
     {
@@ -461,7 +461,7 @@ int main( int argc, char* argv[])
         *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
     }
 
-    CudaUtility::setCudaDevice( rank % deviceCount );
+    GksGpu::CudaUtility::setCudaDevice( rank % deviceCount );
 
     //////////////////////////////////////////////////////////////////////////
 
diff --git a/targets/apps/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp b/targets/apps/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp
index a57824fa8..53725b488 100644
--- a/targets/apps/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp
+++ b/targets/apps/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp
@@ -55,7 +55,7 @@
 #include "GksGpu/Analyzer/CupsAnalyzer.h"
 #include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
 #include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-#include "GksGpu/Analyzer/PointTimeseriesAnalyzer.h"
+#include "GksGpu/Analyzer/PointTimeSeriesAnalyzer.h"
 
 #include "GksGpu/Restart/Restart.h"
 
@@ -92,9 +92,9 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
     if( _testIndex == 24 ) { U = 0.097; rhoFuel = 0.5464; }    // Test 24      medium flow rate
     if( _testIndex == 17 ) { U = 0.117; rhoFuel = 0.5641; }    // Test 17      high flow rate
 
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
+    GksGpu::PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
 
-    setLambdaFromT( prim, 2.85 );
+    GksGpu::setLambdaFromT( prim, 2.85 );
 
     real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
 
@@ -117,7 +117,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
 
-    Parameters parameters;
+    GksGpu::Parameters parameters;
 
     parameters.K  = K;
     parameters.Pr = Pr;
@@ -138,8 +138,8 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     parameters.heatOfReaction = dh;
 
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    //parameters.viscosityModel = ViscosityModel::constant;
+    parameters.viscosityModel = GksGpu::ViscosityModel::sutherlandsLaw;
+    //parameters.viscosityModel = GksGpu::ViscosityModel::constant;
 
     parameters.enableReaction = true;
 
@@ -217,9 +217,9 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    CudaUtility::setCudaDevice(_gpuIndex);
+    GksGpu::CudaUtility::setCudaDevice(_gpuIndex);
 
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
+    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -228,11 +228,11 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
     
     real openBoundaryVelocityLimiter = 1.0;
 
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
+    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
+    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
 
-    SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
-    SPtr<BoundaryCondition> bcPX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
+    SPtr<GksGpu::BoundaryCondition> bcMX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
+    SPtr<GksGpu::BoundaryCondition> bcPX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
 
     bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
     bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
@@ -242,19 +242,19 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
     
-    SPtr<BoundaryCondition> bcMY;
-    SPtr<BoundaryCondition> bcPY;
+    SPtr<GksGpu::BoundaryCondition> bcMY;
+    SPtr<GksGpu::BoundaryCondition> bcPY;
 
-    SPtr<BoundaryCondition> bcMY_2;
-    SPtr<BoundaryCondition> bcPY_2;
+    SPtr<GksGpu::BoundaryCondition> bcMY_2;
+    SPtr<GksGpu::BoundaryCondition> bcPY_2;
 
     if( threeDimensional )
     {
-        bcMY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        bcPY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
+        bcMY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
+        bcPY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
 
-        bcMY_2 = std::make_shared<Symmetry>( dataBase, 'y' );
-        bcPY_2 = std::make_shared<Symmetry>( dataBase, 'y' );
+        bcMY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
+        bcPY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
 
         bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
         bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
@@ -264,8 +264,8 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
     }
     else
     {
-        bcMY = std::make_shared<Periodic>(dataBase);
-        bcPY = std::make_shared<Periodic>(dataBase);
+        bcMY = std::make_shared<GksGpu::Periodic>(dataBase);
+        bcPY = std::make_shared<GksGpu::Periodic>(dataBase);
 
         bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*dx; });
         bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*dx; });
@@ -273,19 +273,19 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
     
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
+    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
     //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, true );
     //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) );
     //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase );
 
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
+    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
     
     bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
     bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
 
     //////////////////////////////////////////////////////////////////////////
 
-    SPtr<BoundaryCondition> burner = std::make_shared<CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
+    SPtr<GksGpu::BoundaryCondition> burner = std::make_shared<GksGpu::CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
     //SPtr<BoundaryCondition> burner = std::make_shared<Inflow>( dataBase, Vec3(0,0,U), prim.lambda, rhoFuel, 1, 0, 0, 1.0 );
 
     burner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
@@ -321,21 +321,21 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
 
-    auto pointTimeSeriesAnalyzerU_P1 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'U' );
-    auto pointTimeSeriesAnalyzerV_P1 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'V' );
-    auto pointTimeSeriesAnalyzerW_P1 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'W' );
+    auto pointTimeSeriesAnalyzerU_P1 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'U' );
+    auto pointTimeSeriesAnalyzerV_P1 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'V' );
+    auto pointTimeSeriesAnalyzerW_P1 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'W' );
 
-    auto pointTimeSeriesAnalyzerU_P2 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'U' );
-    auto pointTimeSeriesAnalyzerV_P2 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'V' );
-    auto pointTimeSeriesAnalyzerW_P2 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'W' );
+    auto pointTimeSeriesAnalyzerU_P2 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'U' );
+    auto pointTimeSeriesAnalyzerV_P2 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'V' );
+    auto pointTimeSeriesAnalyzerW_P2 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'W' );
 
-    auto pointTimeSeriesAnalyzerU_P3 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'U' );
-    auto pointTimeSeriesAnalyzerV_P3 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'V' );
-    auto pointTimeSeriesAnalyzerW_P3 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'W' );
+    auto pointTimeSeriesAnalyzerU_P3 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'U' );
+    auto pointTimeSeriesAnalyzerV_P3 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'V' );
+    auto pointTimeSeriesAnalyzerW_P3 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'W' );
 
-    auto pointTimeSeriesAnalyzerU_P4 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'U' );
-    auto pointTimeSeriesAnalyzerV_P4 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'V' );
-    auto pointTimeSeriesAnalyzerW_P4 = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'W' );
+    auto pointTimeSeriesAnalyzerU_P4 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'U' );
+    auto pointTimeSeriesAnalyzerV_P4 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'V' );
+    auto pointTimeSeriesAnalyzerW_P4 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'W' );
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -346,22 +346,22 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     dataBase->setMesh( meshAdapter );
 
-    CudaUtility::printCudaMemoryUsage();
+    GksGpu::CudaUtility::printCudaMemoryUsage();
     
     if( restartIter == INVALID_INDEX )
     {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
+        GksGpu::Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> GksGpu::ConservedVariables {
 
-            PrimitiveVariables primLocal = prim;
+            GksGpu::PrimitiveVariables primLocal = prim;
 
-            return toConservedVariables(primLocal, parameters.K);
+            return GksGpu::toConservedVariables(primLocal, parameters.K);
         });
 
         writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
     }
     else
     {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
+        GksGpu::Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
 
         writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
     }
@@ -372,7 +372,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
         for( uint level = 0; level < dataBase->numberOfLevels; level++ )
             bc->runBoundaryConditionKernel( dataBase, parameters, level );
 
-    Initializer::initializeDataUpdate(dataBase);
+    GksGpu::Initializer::initializeDataUpdate(dataBase);
 
     dataBase->copyDataDeviceToHost();
 
@@ -387,11 +387,11 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     //////////////////////////////////////////////////////////////////////////
 
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
+    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
 
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
+    GksGpu::ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
 
-    auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 10 * iterPerSecond );
+    auto turbulenceAnalyzer = std::make_shared<GksGpu::TurbulenceAnalyzer>( dataBase, 10 * iterPerSecond );
 
     turbulenceAnalyzer->collect_UU = true;
     turbulenceAnalyzer->collect_VV = true;
@@ -409,7 +409,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
         convergenceAnalyzer.run( iter );
 
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
+        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
 
         pointTimeSeriesAnalyzerU_P1->run(iter, parameters);
         pointTimeSeriesAnalyzerV_P1->run(iter, parameters);
@@ -450,7 +450,7 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
         if( iter % 10000 == 0 /*|| iter == 39000*/)
         {
             dataBase->copyDataDeviceToHost();
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
+            GksGpu::Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
         }
 
         if( iter % 100000 == 0 )
diff --git a/targets/apps/GKS/SingleGPU/SingleGPU.cpp b/targets/apps/GKS/SingleGPU/SingleGPU.cpp
index 78f5c3264..843fe3fc7 100644
--- a/targets/apps/GKS/SingleGPU/SingleGPU.cpp
+++ b/targets/apps/GKS/SingleGPU/SingleGPU.cpp
@@ -72,7 +72,7 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
     //////////////////////////////////////////////////////////////////////////
 
-    Parameters parameters;
+    GksGpu::Parameters parameters;
 
     parameters.K  = 0;
     parameters.Pr = 1;
@@ -121,31 +121,31 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    auto dataBase = std::make_shared<DataBase>( "CPU" );
+    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
+    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Periodic>( dataBase );
+    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Periodic>( dataBase );
 
     bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
     bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
 
     //////////////////////////////////////////////////////////////////////////
 
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
+    SPtr<GksGpu::BoundaryCondition> bcMY = std::make_shared<GksGpu::Periodic>( dataBase );
+    SPtr<GksGpu::BoundaryCondition> bcPY = std::make_shared<GksGpu::Periodic>( dataBase );
 
     bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
     bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
 
     //////////////////////////////////////////////////////////////////////////
     
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
+    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::Periodic>( dataBase );
+    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Periodic>( dataBase );
     
     bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
     bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
@@ -183,11 +183,11 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
     dataBase->setCommunicators( meshAdapter );
 
-    CudaUtility::printCudaMemoryUsage();
+    GksGpu::CudaUtility::printCudaMemoryUsage();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables
+    GksGpu::Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> GksGpu::ConservedVariables
     {
         real U = 0.1;
 
@@ -203,7 +203,7 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
         real rhoLocal = 2.0 * pLocal * parameters.lambdaRef;
 
-        return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
+        return GksGpu::toConservedVariables( GksGpu::PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
     });
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -214,12 +214,12 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
         for( uint level = 0; level < dataBase->numberOfLevels; level++ )
             bc->runBoundaryConditionKernel( dataBase, parameters, level );
 
-    Initializer::initializeDataUpdate(dataBase);
+    GksGpu::Initializer::initializeDataUpdate(dataBase);
 
     //dataBase->copyDataDeviceToHost();
 
     //writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    
+
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -227,7 +227,7 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
     const uint numberOfIterations = 1000;
 
-    CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
+    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
 
     real CUPS = 0;
 
@@ -235,7 +235,7 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
     for( uint iter = 1; iter <= numberOfIterations; iter++ )
     {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
+        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
 
         CUPS = cupsAnalyzer.run( iter, parameters.dt );
     }
@@ -244,7 +244,7 @@ real performanceTest( std::string path, std::string simulationName, uint nx )
 
     //dataBase->copyDataDeviceToHost();
 
-    //writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final" );
+    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final" );
     
     //////////////////////////////////////////////////////////////////////////
 
@@ -284,7 +284,8 @@ int main( int argc, char* argv[])
         std::ofstream file;
         file.open( path + simulationName + ".dat" );
 
-        std::vector<uint> nxList = {32,64,128,256};
+        //std::vector<uint> nxList = {32,64,128,256};
+        std::vector<uint> nxList = {256};
 
         for( auto nx : nxList )
         {
@@ -293,7 +294,7 @@ int main( int argc, char* argv[])
             std::ofstream logFile( path + simulationName + "_nx_" + std::to_string(nx) + ".log" );
             logging::Logger::addStream(&logFile);
 
-            CudaUtility::setCudaDevice( 0 );
+            GksGpu::CudaUtility::setCudaDevice( 0 );
     
             //////////////////////////////////////////////////////////////////////////
 
diff --git a/targets/libs/GridGenerator/CMakeLists.txt b/targets/libs/GridGenerator/CMakeLists.txt
index 1ab0b7572..460d60d03 100644
--- a/targets/libs/GridGenerator/CMakeLists.txt
+++ b/targets/libs/GridGenerator/CMakeLists.txt
@@ -14,5 +14,7 @@ include(CMakePackage.cmake)
 buildLib(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
 groupTarget(${targetName} ${libraryFolder})
 
+set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+
 # Specify the linking to 3rdParty libs
 include(3rdPartyLinking.cmake)
-- 
GitLab