From 5c036235bb8a51c475871d89af282210bd65ccfe Mon Sep 17 00:00:00 2001 From: "LEGOLAS\\lenz" <lenz@irmb.tu-bs.de> Date: Tue, 29 Oct 2019 13:41:41 +0100 Subject: [PATCH] New target for SingleGPU performance testing --- CMakeLists.txt | 1 + src/GksGpu/Analyzer/CupsAnalyzer.cpp | 8 +- src/GksGpu/Analyzer/CupsAnalyzer.h | 2 +- targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp | 8 +- .../apps/GKS/SingleGPU/3rdPartyLinking.cmake | 11 + targets/apps/GKS/SingleGPU/CMakeLists.txt | 19 + targets/apps/GKS/SingleGPU/CMakePackage.cmake | 9 + targets/apps/GKS/SingleGPU/SingleGPU.cpp | 333 ++++++++++++++++++ targets/apps/GKS/SingleGPU/package.include | 0 9 files changed, 384 insertions(+), 7 deletions(-) create mode 100644 targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake create mode 100644 targets/apps/GKS/SingleGPU/CMakeLists.txt create mode 100644 targets/apps/GKS/SingleGPU/CMakePackage.cmake create mode 100644 targets/apps/GKS/SingleGPU/SingleGPU.cpp create mode 100644 targets/apps/GKS/SingleGPU/package.include diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f3cf04ab..1909f964b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,6 +182,7 @@ IF (HULC.BUILD_VF_GKS) add_subdirectory(targets/apps/GKS/MultiGPU) add_subdirectory(targets/apps/GKS/MultiGPU_nD) + add_subdirectory(targets/apps/GKS/SingleGPU) ELSE() MESSAGE( STATUS "exclude Virtual Fluids GKS." ) ENDIF() diff --git a/src/GksGpu/Analyzer/CupsAnalyzer.cpp b/src/GksGpu/Analyzer/CupsAnalyzer.cpp index 22614c811..86dc316a2 100644 --- a/src/GksGpu/Analyzer/CupsAnalyzer.cpp +++ b/src/GksGpu/Analyzer/CupsAnalyzer.cpp @@ -44,18 +44,20 @@ void CupsAnalyzer::restart() this->timerRestart->start(); } -void CupsAnalyzer::run( uint iter, real dt ) +real CupsAnalyzer::run( uint iter, real dt ) { real currentRuntime = this->timer->getCurrentRuntimeInSeconds(); real currentRuntimeSinceRestart = this->timerRestart->getCurrentRuntimeInSeconds(); + real CUPS = -1.0; + this->counter++; if( checkOutputPerTime(currentRuntime) || checkOutputPerIter(iter) ) { unsigned long long numberOfCellUpdates = this->numberOfCellUpdatesPerTimeStep * (unsigned long long)counter; - real CUPS = real(numberOfCellUpdates) / currentRuntimeSinceRestart; + CUPS = real(numberOfCellUpdates) / currentRuntimeSinceRestart; this->printCups( iter, iter * dt, currentRuntime, CUPS ); @@ -66,6 +68,8 @@ void CupsAnalyzer::run( uint iter, real dt ) { outputPerTimeCounter++; } + + return CUPS; } bool CupsAnalyzer::checkOutputPerTime(real currentRuntime) diff --git a/src/GksGpu/Analyzer/CupsAnalyzer.h b/src/GksGpu/Analyzer/CupsAnalyzer.h index 3fe5ae3f1..49651a221 100644 --- a/src/GksGpu/Analyzer/CupsAnalyzer.h +++ b/src/GksGpu/Analyzer/CupsAnalyzer.h @@ -40,7 +40,7 @@ public: void restart(); - void run( uint iter, real dt ); + real run( uint iter, real dt ); private: diff --git a/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp b/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp index a9309a2cf..812e9a335 100644 --- a/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp +++ b/targets/apps/GKS/MultiGPU_nD/MultiGPU_nD.cpp @@ -350,7 +350,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo const uint numberOfIterations = 10000; - CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, numberOfIterations ); + CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations ); MPI_Barrier(MPI_COMM_WORLD); @@ -359,9 +359,9 @@ void performanceTest( std::string path, std::string simulationName, uint decompo for( uint iter = 1; iter <= numberOfIterations; iter++ ) { TimeStepping::nestedTimeStep(dataBase, parameters, 0); - } - cupsAnalyzer.run( numberOfIterations, parameters.dt ); + cupsAnalyzer.run( iter, parameters.dt ); + } ////////////////////////////////////////////////////////////////////////// @@ -411,7 +411,7 @@ int main( int argc, char* argv[]) ////////////////////////////////////////////////////////////////////////// bool strongScaling = false; - uint nx = 64; + uint nx = 128; uint decompositionDimension = 3; if( argc > 1 ) nx = atoi( argv[1] ); diff --git a/targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake b/targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake new file mode 100644 index 000000000..72c7afc60 --- /dev/null +++ b/targets/apps/GKS/SingleGPU/3rdPartyLinking.cmake @@ -0,0 +1,11 @@ +include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake) +linkMPI(${targetName}) +include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake) +linkCuda(${targetName}) +#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake) +#linkMetis(${targetName}) + +#if(HULC.BUILD_JSONCPP) +# include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake) +# linkJsonCpp(${targetName}) +#endif() diff --git a/targets/apps/GKS/SingleGPU/CMakeLists.txt b/targets/apps/GKS/SingleGPU/CMakeLists.txt new file mode 100644 index 000000000..d40431017 --- /dev/null +++ b/targets/apps/GKS/SingleGPU/CMakeLists.txt @@ -0,0 +1,19 @@ +setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR}) + +set(linkDirectories "") +set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu) +set(includeDirectories "${CMAKE_SOURCE_DIR}/src" + "${CMAKE_SOURCE_DIR}/src/Core" + "${CMAKE_SOURCE_DIR}/src/GridGenerator" + "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter" + "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter" + "${CMAKE_SOURCE_DIR}/src/GksGpu") + +#glob files and save in MY_SRCS +include(CMakePackage.cmake) + +buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}") +groupTarget(${targetName} ${gksAppFolder}) + +# Specify the linking to 3rdParty libs +include(3rdPartyLinking.cmake) diff --git a/targets/apps/GKS/SingleGPU/CMakePackage.cmake b/targets/apps/GKS/SingleGPU/CMakePackage.cmake new file mode 100644 index 000000000..5d39e3804 --- /dev/null +++ b/targets/apps/GKS/SingleGPU/CMakePackage.cmake @@ -0,0 +1,9 @@ +#FILE ENDINGS +resetFileEndingsToCollect() +addCAndCPPFileTypes() +addFileEndingToCollect("*.cu") +addFileEndingToCollect("*.cuh") + +#GLOB SOURCE FILES IN MY_SRCS +unset(MY_SRCS) +includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR}) \ No newline at end of file diff --git a/targets/apps/GKS/SingleGPU/SingleGPU.cpp b/targets/apps/GKS/SingleGPU/SingleGPU.cpp new file mode 100644 index 000000000..10a07edbc --- /dev/null +++ b/targets/apps/GKS/SingleGPU/SingleGPU.cpp @@ -0,0 +1,333 @@ +//#define MPI_LOGGING + +#define _USE_MATH_DEFINES +#include <math.h> +#include <string> +#include <iostream> +#include <iomanip> +#include <exception> +#include <fstream> +#include <sstream> +#include <memory> + +#include "Core/Timer/Timer.h" +#include "Core/PointerDefinitions.h" +#include "Core/DataTypes.h" +#include "Core/VectorTypes.h" +#include "Core/Logger/Logger.h" + +#include "GridGenerator/geometries/Cuboid/Cuboid.h" +#include "GridGenerator/geometries/Sphere/Sphere.h" +#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h" +#include "GridGenerator/geometries/Conglomerate/Conglomerate.h" + +#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" +#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" +#include "GridGenerator/grid/GridFactory.h" +#include "GridGenerator/geometries/BoundingBox/BoundingBox.h" +#include "GridGenerator/utilities/communication.h" + +#include "GksMeshAdapter/GksMeshAdapter.h" + +#include "GksVtkAdapter/VTKInterface.h" + +#include "GksGpu/DataBase/DataBase.h" +#include "GksGpu/Parameters/Parameters.h" +#include "GksGpu/Initializer/Initializer.h" + +#include "GksGpu/FlowStateData/FlowStateData.cuh" +#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh" + +#include "GksGpu/BoundaryConditions/BoundaryCondition.h" +#include "GksGpu/BoundaryConditions/IsothermalWall.h" +#include "GksGpu/BoundaryConditions/Periodic.h" +#include "GksGpu/BoundaryConditions/Pressure.h" +#include "GksGpu/BoundaryConditions/AdiabaticWall.h" + +#include "GksGpu/Communication/Communicator.h" + +#include "GksGpu/TimeStepping/NestedTimeStep.h" + +#include "GksGpu/Analyzer/CupsAnalyzer.h" +#include "GksGpu/Analyzer/ConvergenceAnalyzer.h" +#include "GksGpu/Analyzer/TurbulenceAnalyzer.h" + +#include "GksGpu/CudaUtility/CudaUtility.h" +#include "GksGpu/Communication/MpiUtility.h" + +////////////////////////////////////////////////////////////////////////// + +real performanceTest( std::string path, std::string simulationName, uint nx ) +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + real L = 1.0; + + real LX = L; + real LY = L; + real LZ = L; + + real dx = L / real(nx); + + ////////////////////////////////////////////////////////////////////////// + + Parameters parameters; + + parameters.K = 0; + parameters.Pr = 1; + parameters.mu = 0.01; + + parameters.force.x = 0; + parameters.force.y = 0; + parameters.force.z = 0; + + parameters.dt = 0.0001; + parameters.dx = dx; + + parameters.lambdaRef = 1.0e-2; + + parameters.forcingSchemeIdx = 0; + + parameters.enableReaction = false; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto gridFactory = GridFactory::make(); + gridFactory->setGridStrategy(Device::CPU); + gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT); + + auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + gridBuilder->addCoarseGrid( - 0.5*L, - 0.5*L, - 0.5*L, + 0.5*L, 0.5*L, 0.5*L, dx); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + gridBuilder->setPeriodicBoundaryCondition(true,true,true); + + gridBuilder->buildGrids(GKS, false); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + GksMeshAdapter meshAdapter( gridBuilder ); + + meshAdapter.inputGrid(); + + meshAdapter.findPeriodicBoundaryNeighbors(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto dataBase = std::make_shared<DataBase>( "GPU" ); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase ); + + bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } ); + bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x > 0.5*L; } ); + + ////////////////////////////////////////////////////////////////////////// + + SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase ); + + bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } ); + bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y > 0.5*L; } ); + + ////////////////////////////////////////////////////////////////////////// + + SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase ); + + bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } ); + bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > 0.5*L; } ); + + ////////////////////////////////////////////////////////////////////////// + + dataBase->boundaryConditions.push_back( bcMX ); + dataBase->boundaryConditions.push_back( bcPX ); + + dataBase->boundaryConditions.push_back( bcMY ); + dataBase->boundaryConditions.push_back( bcPY ); + + dataBase->boundaryConditions.push_back( bcMZ ); + dataBase->boundaryConditions.push_back( bcPZ ); + + ////////////////////////////////////////////////////////////////////////// + + *logging::out << logging::Logger::INFO_HIGH << "NumberOfBoundaryConditions = " << (int)dataBase->boundaryConditions.size() << "\n"; + + *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n"; + + *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n"; + + *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n"; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + dataBase->setMesh( meshAdapter ); + + dataBase->setCommunicators( meshAdapter ); + + CudaUtility::printCudaMemoryUsage(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables + { + real U = 0.1; + + real ULocal = 0.1 + U * sin( 2.0 * M_PI * cellCenter.x ) * cos( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z ); + real VLocal = 0.1 - U * cos( 2.0 * M_PI * cellCenter.x ) * sin( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z ); + real WLocal = 0.1; + + real rho = 1.0; + + real p0 = 0.5 * rho / parameters.lambdaRef; + + real pLocal = p0 + rho * U * U / 16.0 * ( cos( 2.0 * M_PI * 2.0 * cellCenter.x ) + cos( 2.0 * M_PI * 2.0 * cellCenter.y ) ) * ( 2.0 + cos( 2.0 * M_PI * 2.0 * cellCenter.z ) ); + + real rhoLocal = 2.0 * pLocal * parameters.lambdaRef; + + return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K ); + }); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + dataBase->copyDataHostToDevice(); + + for( auto bc : dataBase->boundaryConditions ) + for( uint level = 0; level < dataBase->numberOfLevels; level++ ) + bc->runBoundaryConditionKernel( dataBase, parameters, level ); + + Initializer::initializeDataUpdate(dataBase); + + dataBase->copyDataDeviceToHost(); + + writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" ); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const uint numberOfIterations = 100; + + CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations ); + + real CUPS = 0; + + cupsAnalyzer.start(); + + for( uint iter = 1; iter <= numberOfIterations; iter++ ) + { + TimeStepping::nestedTimeStep(dataBase, parameters, 0); + + CUPS = cupsAnalyzer.run( iter, parameters.dt ); + } + + ////////////////////////////////////////////////////////////////////////// + + dataBase->copyDataDeviceToHost(); + + writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final" ); + + ////////////////////////////////////////////////////////////////////////// + + int crashCellIndex = dataBase->getCrashCellIndex(); + if( crashCellIndex >= 0 ) + { + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "============= Simulation Crashed!!! =============\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + } + + return CUPS; +} + +int main( int argc, char* argv[]) +{ + ////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN32 + std::string path( "F:/Work/Computations/out/SingleGPU/" ); +#else + //std::string path( "/home/stephan/Computations/out/" ); + std::string path( "out/" ); +#endif + + ////////////////////////////////////////////////////////////////////////// + + try + { + logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW); + logging::Logger::timeStamp(logging::Logger::ENABLE); + + std::string simulationName ( "SingleGPU" ); + + std::ofstream file; + file.open( path + simulationName + ".dat" ); + + std::vector<uint> nxList = {32,64,128,256,384}; + + for( auto nx : nxList ) + { + logging::Logger::addStream(&std::cout); + + std::ofstream logFile( path + simulationName + "_nx_" + std::to_string(nx) + ".log" ); + logging::Logger::addStream(&logFile); + + CudaUtility::setCudaDevice( 0 ); + + ////////////////////////////////////////////////////////////////////////// + + if( sizeof(real) == 4 ) + *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n"; + else + *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n"; + + real CUPS = performanceTest( path, simulationName + "_nx_" + std::to_string(nx), nx ); + + file << std::setw(5) << nx <<std::setw(20) << CUPS << std::endl; + + logFile.close(); + + logging::Logger::resetStreams(); + } + + file.close(); + } + catch (const std::exception& e) + { + *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n"; + } + catch (const std::bad_alloc& e) + { + *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n"; + } + catch (...) + { + *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n"; + } + + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + + return 0; +} diff --git a/targets/apps/GKS/SingleGPU/package.include b/targets/apps/GKS/SingleGPU/package.include new file mode 100644 index 000000000..e69de29bb -- GitLab