diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7f3cf04ab14f09e84fea43d4f454b445ab970d68..1909f964b6093deaff937f1b65f72a17ca07d642 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,6 +182,7 @@ IF (HULC.BUILD_VF_GKS)
     
     add_subdirectory(targets/apps/GKS/MultiGPU)
     add_subdirectory(targets/apps/GKS/MultiGPU_nD)
+    add_subdirectory(targets/apps/GKS/SingleGPU)
 ELSE()
   MESSAGE( STATUS "exclude Virtual Fluids GKS." )
 ENDIF()
diff --git a/src/GksGpu/Analyzer/CupsAnalyzer.cpp b/src/GksGpu/Analyzer/CupsAnalyzer.cpp
index 22614c81199c5a5bf64fbab96f653d4c45a1d634..86dc316a28549861b7b90ef7ac78bb71932025ec 100644
--- a/src/GksGpu/Analyzer/CupsAnalyzer.cpp
+++ b/src/GksGpu/Analyzer/CupsAnalyzer.cpp
@@ -44,18 +44,20 @@ void CupsAnalyzer::restart()
     this->timerRestart->start();
 }
 
-void CupsAnalyzer::run( uint iter, real dt )
+real CupsAnalyzer::run( uint iter, real dt )
 {
     real currentRuntime             = this->timer->getCurrentRuntimeInSeconds();
     real currentRuntimeSinceRestart = this->timerRestart->getCurrentRuntimeInSeconds();
 
+    real CUPS = -1.0;
+
     this->counter++;
 
     if( checkOutputPerTime(currentRuntime) || checkOutputPerIter(iter) )
     {
         unsigned long long numberOfCellUpdates = this->numberOfCellUpdatesPerTimeStep * (unsigned long long)counter;
 
-        real CUPS = real(numberOfCellUpdates) / currentRuntimeSinceRestart;
+        CUPS = real(numberOfCellUpdates) / currentRuntimeSinceRestart;
 
         this->printCups( iter, iter * dt, currentRuntime, CUPS );
 
@@ -66,6 +68,8 @@ void CupsAnalyzer::run( uint iter, real dt )
     {
         outputPerTimeCounter++;
     }
+
+    return CUPS;
 }
 
 bool CupsAnalyzer::checkOutputPerTime(real currentRuntime)
diff --git a/src/GksGpu/Analyzer/CupsAnalyzer.h b/src/GksGpu/Analyzer/CupsAnalyzer.h
index 3fe5ae3f12ca7497911f1f30743a5de5804dadfa..49651a221e9836a45000b6a1dbe88df77c440923 100644
--- a/src/GksGpu/Analyzer/CupsAnalyzer.h
+++ b/src/GksGpu/Analyzer/CupsAnalyzer.h
@@ -40,7 +40,7 @@ public:
 
     void restart();
 
-    void run( uint iter, real dt );
+    real run( uint iter, real dt );
 
 private:
 
diff --git a/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp b/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp
index a9309a2cfd8613f754508c5a3816e1be37da0a27..812e9a33581efc27d6b283aa869a43adcfd1029e 100644
--- a/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp
+++ b/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp
@@ -350,7 +350,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
 
     const uint numberOfIterations = 10000;
 
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, numberOfIterations );
+    CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
 
     MPI_Barrier(MPI_COMM_WORLD);
 
@@ -359,9 +359,9 @@ void performanceTest( std::string path, std::string simulationName, uint decompo
     for( uint iter = 1; iter <= numberOfIterations; iter++ )
     {
         TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-    }
 
-    cupsAnalyzer.run( numberOfIterations, parameters.dt );
+        cupsAnalyzer.run( iter, parameters.dt );
+    }
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -411,7 +411,7 @@ int main( int argc, char* argv[])
     //////////////////////////////////////////////////////////////////////////
 
     bool strongScaling = false;
-    uint nx = 64;
+    uint nx = 128;
     uint decompositionDimension = 3;
 
     if( argc > 1 ) nx = atoi( argv[1] );
diff --git a/targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake b/targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..72c7afc6076b832263506ab9ce777925cfcc6a66
--- /dev/null
+++ b/targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake
@@ -0,0 +1,11 @@
+include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
+linkMPI(${targetName})
+include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
+linkCuda(${targetName})
+#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
+#linkMetis(${targetName})
+
+#if(HULC.BUILD_JSONCPP)
+#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
+#  linkJsonCpp(${targetName})
+#endif()
diff --git a/targets/apps/GKS/SingleGPU/CMakeLists.txt b/targets/apps/GKS/SingleGPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d404310177a2f53760d1c84bce79d7d070fed409
--- /dev/null
+++ b/targets/apps/GKS/SingleGPU/CMakeLists.txt
@@ -0,0 +1,19 @@
+setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
+
+set(linkDirectories "")
+set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
+set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
+                       "${CMAKE_SOURCE_DIR}/src/Core"
+                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
+                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
+                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
+                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
+
+#glob files and save in MY_SRCS
+include(CMakePackage.cmake)
+
+buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
+groupTarget(${targetName} ${gksAppFolder})
+
+# Specify the linking to 3rdParty libs
+include(3rdPartyLinking.cmake)
diff --git a/targets/apps/GKS/SingleGPU/CMakePackage.cmake b/targets/apps/GKS/SingleGPU/CMakePackage.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..5d39e3804dbd180790629111449a7dc918292430
--- /dev/null
+++ b/targets/apps/GKS/SingleGPU/CMakePackage.cmake
@@ -0,0 +1,9 @@
+#FILE ENDINGS
+resetFileEndingsToCollect()
+addCAndCPPFileTypes()
+addFileEndingToCollect("*.cu")
+addFileEndingToCollect("*.cuh")
+
+#GLOB SOURCE FILES IN MY_SRCS
+unset(MY_SRCS)
+includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/targets/apps/GKS/SingleGPU/SingleGPU.cpp b/targets/apps/GKS/SingleGPU/SingleGPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..10a07edbc52ef2ba1a30f2d5bdcb2c0fab57b131
--- /dev/null
+++ b/targets/apps/GKS/SingleGPU/SingleGPU.cpp
@@ -0,0 +1,333 @@
+//#define MPI_LOGGING
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string>
+#include <iostream>
+#include <iomanip>
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <memory>
+
+#include "Core/Timer/Timer.h"
+#include "Core/PointerDefinitions.h"
+#include "Core/DataTypes.h"
+#include "Core/VectorTypes.h"
+#include "Core/Logger/Logger.h"
+
+#include "GridGenerator/geometries/Cuboid/Cuboid.h"
+#include "GridGenerator/geometries/Sphere/Sphere.h"
+#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
+#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
+
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
+#include "GridGenerator/utilities/communication.h"
+
+#include "GksMeshAdapter/GksMeshAdapter.h"
+
+#include "GksVtkAdapter/VTKInterface.h"
+
+#include "GksGpu/DataBase/DataBase.h"
+#include "GksGpu/Parameters/Parameters.h"
+#include "GksGpu/Initializer/Initializer.h"
+
+#include "GksGpu/FlowStateData/FlowStateData.cuh"
+#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
+
+#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
+#include "GksGpu/BoundaryConditions/IsothermalWall.h"
+#include "GksGpu/BoundaryConditions/Periodic.h"
+#include "GksGpu/BoundaryConditions/Pressure.h"
+#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
+
+#include "GksGpu/Communication/Communicator.h"
+
+#include "GksGpu/TimeStepping/NestedTimeStep.h"
+
+#include "GksGpu/Analyzer/CupsAnalyzer.h"
+#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
+#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
+
+#include "GksGpu/CudaUtility/CudaUtility.h"
+#include "GksGpu/Communication/MpiUtility.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+real performanceTest( std::string path, std::string simulationName, uint nx )
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    real L  = 1.0;
+
+    real LX = L;
+    real LY = L;
+    real LZ = L;
+
+    real dx = L / real(nx);
+
+    //////////////////////////////////////////////////////////////////////////
+
+    Parameters parameters;
+
+    parameters.K  = 0;
+    parameters.Pr = 1;
+    parameters.mu = 0.01;
+
+    parameters.force.x = 0;
+    parameters.force.y = 0;
+    parameters.force.z = 0;
+
+    parameters.dt = 0.0001;
+    parameters.dx = dx;
+
+    parameters.lambdaRef = 1.0e-2;
+    
+    parameters.forcingSchemeIdx = 0;
+
+    parameters.enableReaction = false;
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    auto gridFactory = GridFactory::make();
+    gridFactory->setGridStrategy(Device::CPU);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+    gridBuilder->addCoarseGrid( - 0.5*L, - 0.5*L, - 0.5*L,
+                                  0.5*L,   0.5*L,   0.5*L, dx);
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    gridBuilder->setPeriodicBoundaryCondition(true,true,true);
+
+    gridBuilder->buildGrids(GKS, false);
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    GksMeshAdapter meshAdapter( gridBuilder );
+
+    meshAdapter.inputGrid();
+
+    meshAdapter.findPeriodicBoundaryNeighbors();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    auto dataBase = std::make_shared<DataBase>( "GPU" );
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
+    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
+
+    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
+    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
+
+    //////////////////////////////////////////////////////////////////////////
+
+    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
+    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
+
+    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
+    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
+
+    //////////////////////////////////////////////////////////////////////////
+    
+    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
+    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
+    
+    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
+    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
+
+    //////////////////////////////////////////////////////////////////////////
+
+    dataBase->boundaryConditions.push_back( bcMX );
+    dataBase->boundaryConditions.push_back( bcPX );
+    
+    dataBase->boundaryConditions.push_back( bcMY );
+    dataBase->boundaryConditions.push_back( bcPY );
+
+    dataBase->boundaryConditions.push_back( bcMZ );
+    dataBase->boundaryConditions.push_back( bcPZ );
+
+    //////////////////////////////////////////////////////////////////////////
+
+    *logging::out << logging::Logger::INFO_HIGH << "NumberOfBoundaryConditions = " << (int)dataBase->boundaryConditions.size() << "\n";
+
+    *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n";
+
+    *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n";
+
+    *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n";
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    dataBase->setMesh( meshAdapter );
+
+    dataBase->setCommunicators( meshAdapter );
+
+    CudaUtility::printCudaMemoryUsage();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables
+    {
+        real U = 0.1;
+
+        real ULocal =   0.1 + U * sin( 2.0 * M_PI * cellCenter.x ) * cos( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z );
+        real VLocal =   0.1 - U * cos( 2.0 * M_PI * cellCenter.x ) * sin( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z );
+        real WLocal =   0.1;
+
+        real rho = 1.0;
+
+        real p0 = 0.5 * rho / parameters.lambdaRef;
+
+        real pLocal = p0 + rho * U * U / 16.0 * ( cos( 2.0 * M_PI * 2.0 * cellCenter.x ) + cos( 2.0 * M_PI * 2.0 * cellCenter.y ) ) * ( 2.0 + cos( 2.0 * M_PI * 2.0 * cellCenter.z ) );
+
+        real rhoLocal = 2.0 * pLocal * parameters.lambdaRef;
+
+        return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
+    });
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    dataBase->copyDataHostToDevice();
+
+    for( auto bc : dataBase->boundaryConditions ) 
+        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
+            bc->runBoundaryConditionKernel( dataBase, parameters, level );
+
+    Initializer::initializeDataUpdate(dataBase);
+
+    dataBase->copyDataDeviceToHost();
+
+    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
+    
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    const uint numberOfIterations = 100;
+
+    CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
+
+    real CUPS = 0;
+
+    cupsAnalyzer.start();
+
+    for( uint iter = 1; iter <= numberOfIterations; iter++ )
+    {
+        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
+
+        CUPS = cupsAnalyzer.run( iter, parameters.dt );
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+
+    dataBase->copyDataDeviceToHost();
+
+    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final" );
+    
+    //////////////////////////////////////////////////////////////////////////
+
+    int crashCellIndex = dataBase->getCrashCellIndex();
+    if( crashCellIndex >= 0 )
+    {
+        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
+        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
+        *logging::out << logging::Logger::LOGGER_ERROR << "============= Simulation Crashed!!! =============\n";
+        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
+        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
+    }
+
+    return CUPS;
+}
+
+int main( int argc, char* argv[])
+{
+    //////////////////////////////////////////////////////////////////////////
+
+#ifdef _WIN32
+    std::string path( "F:/Work/Computations/out/SingleGPU/" );
+#else
+    //std::string path( "/home/stephan/Computations/out/" );
+    std::string path( "out/" );
+#endif
+
+    //////////////////////////////////////////////////////////////////////////
+
+    try
+    {
+        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+        logging::Logger::timeStamp(logging::Logger::ENABLE);
+
+        std::string simulationName ( "SingleGPU" );
+
+        std::ofstream file;
+        file.open( path + simulationName + ".dat" );
+
+        std::vector<uint> nxList = {32,64,128,256,384};
+
+        for( auto nx : nxList )
+        {
+            logging::Logger::addStream(&std::cout);
+    
+            std::ofstream logFile( path + simulationName + "_nx_" + std::to_string(nx) + ".log" );
+            logging::Logger::addStream(&logFile);
+
+            CudaUtility::setCudaDevice( 0 );
+    
+            //////////////////////////////////////////////////////////////////////////
+
+            if( sizeof(real) == 4 )
+                *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
+            else
+                *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
+
+            real CUPS = performanceTest( path, simulationName + "_nx_" + std::to_string(nx), nx );
+
+            file << std::setw(5) << nx <<std::setw(20) << CUPS << std::endl;
+
+            logFile.close();
+            
+            logging::Logger::resetStreams();
+        }
+
+        file.close();
+    }
+    catch (const std::exception& e)
+    {     
+        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+    }
+    catch (const std::bad_alloc& e)
+    {  
+        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+    }
+    catch (...)
+    {
+        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////
+
+   return 0;
+}
diff --git a/targets/apps/GKS/SingleGPU/package.include b/targets/apps/GKS/SingleGPU/package.include
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391