From 48a296c2511f04a31bd1e285a2e01b71e144598b Mon Sep 17 00:00:00 2001 From: "LEGOLAS\\lenz" <lenz@irmb.tu-bs.de> Date: Thu, 13 Jun 2019 09:46:08 +0200 Subject: [PATCH] nnew MultiGPU Performance Test target --- CMakeLists.txt | 1 + targets/apps/GKS/Flame7cm/Flame7cm.cpp | 10 +- .../GKS/MultiGPU_2D/3rdPartyLinking.cmake | 11 + targets/apps/GKS/MultiGPU_2D/CMakeLists.txt | 19 + .../apps/GKS/MultiGPU_2D/CMakePackage.cmake | 9 + targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp | 476 ++++++++++++++++++ targets/apps/GKS/MultiGPU_2D/package.include | 0 targets/apps/GKS/PoolFire/PoolFire.cpp | 6 +- 8 files changed, 524 insertions(+), 8 deletions(-) create mode 100644 targets/apps/GKS/MultiGPU_2D/3rdPartyLinking.cmake create mode 100644 targets/apps/GKS/MultiGPU_2D/CMakeLists.txt create mode 100644 targets/apps/GKS/MultiGPU_2D/CMakePackage.cmake create mode 100644 targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp create mode 100644 targets/apps/GKS/MultiGPU_2D/package.include diff --git a/CMakeLists.txt b/CMakeLists.txt index 666d84826..408a8b255 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,7 @@ IF (HULC.BUILD_VF_GKS) add_subdirectory(targets/apps/GKS/Candle) add_subdirectory(targets/apps/GKS/MultiGPU) + add_subdirectory(targets/apps/GKS/MultiGPU_2D) ELSE() MESSAGE( STATUS "exclude Virtual Fluids GKS." ) ENDIF() diff --git a/targets/apps/GKS/Flame7cm/Flame7cm.cpp b/targets/apps/GKS/Flame7cm/Flame7cm.cpp index 2972fcaf9..fe7116093 100644 --- a/targets/apps/GKS/Flame7cm/Flame7cm.cpp +++ b/targets/apps/GKS/Flame7cm/Flame7cm.cpp @@ -187,8 +187,8 @@ void thermalCavity( std::string path, std::string simulationName ) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - SPtr<BoundaryCondition> bcMX = std::make_shared<Open>( dataBase, prim ); - SPtr<BoundaryCondition> bcPX = std::make_shared<Open>( dataBase, prim ); + SPtr<BoundaryCondition> bcMX = std::make_shared<Open>( dataBase, prim, 10.0 ); + SPtr<BoundaryCondition> bcPX = std::make_shared<Open>( dataBase, prim, 10.0 ); //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), false ); //SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), false ); @@ -208,8 +208,8 @@ void thermalCavity( std::string path, std::string simulationName ) if( threeDimensional ) { - bcMY = std::make_shared<Open>( dataBase, prim ); - bcPY = std::make_shared<Open>( dataBase, prim ); + bcMY = std::make_shared<Open>( dataBase, prim, 10.0 ); + bcPY = std::make_shared<Open>( dataBase, prim, 10.0 ); bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } ); bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y > 0.5*L; } ); @@ -230,7 +230,7 @@ void thermalCavity( std::string path, std::string simulationName ) //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) ); //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase ); - SPtr<BoundaryCondition> bcPZ = std::make_shared<Open>( dataBase, prim ); + SPtr<BoundaryCondition> bcPZ = std::make_shared<Open>( dataBase, prim, 10.0 ); bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0 /*&& std::sqrt(center.x*center.x + center.y*center.y) >= 0.5*0.071*/; } ); bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H ; } ); diff --git a/targets/apps/GKS/MultiGPU_2D/3rdPartyLinking.cmake b/targets/apps/GKS/MultiGPU_2D/3rdPartyLinking.cmake new file mode 100644 index 000000000..72c7afc60 --- /dev/null +++ b/targets/apps/GKS/MultiGPU_2D/3rdPartyLinking.cmake @@ -0,0 +1,11 @@ +include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake) +linkMPI(${targetName}) +include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake) +linkCuda(${targetName}) +#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake) +#linkMetis(${targetName}) + +#if(HULC.BUILD_JSONCPP) +# include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake) +# linkJsonCpp(${targetName}) +#endif() diff --git a/targets/apps/GKS/MultiGPU_2D/CMakeLists.txt b/targets/apps/GKS/MultiGPU_2D/CMakeLists.txt new file mode 100644 index 000000000..d40431017 --- /dev/null +++ b/targets/apps/GKS/MultiGPU_2D/CMakeLists.txt @@ -0,0 +1,19 @@ +setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR}) + +set(linkDirectories "") +set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu) +set(includeDirectories "${CMAKE_SOURCE_DIR}/src" + "${CMAKE_SOURCE_DIR}/src/Core" + "${CMAKE_SOURCE_DIR}/src/GridGenerator" + "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter" + "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter" + "${CMAKE_SOURCE_DIR}/src/GksGpu") + +#glob files and save in MY_SRCS +include(CMakePackage.cmake) + +buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}") +groupTarget(${targetName} ${gksAppFolder}) + +# Specify the linking to 3rdParty libs +include(3rdPartyLinking.cmake) diff --git a/targets/apps/GKS/MultiGPU_2D/CMakePackage.cmake b/targets/apps/GKS/MultiGPU_2D/CMakePackage.cmake new file mode 100644 index 000000000..5d39e3804 --- /dev/null +++ b/targets/apps/GKS/MultiGPU_2D/CMakePackage.cmake @@ -0,0 +1,9 @@ +#FILE ENDINGS +resetFileEndingsToCollect() +addCAndCPPFileTypes() +addFileEndingToCollect("*.cu") +addFileEndingToCollect("*.cuh") + +#GLOB SOURCE FILES IN MY_SRCS +unset(MY_SRCS) +includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR}) \ No newline at end of file diff --git a/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp b/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp new file mode 100644 index 000000000..0288c6e50 --- /dev/null +++ b/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp @@ -0,0 +1,476 @@ +//#define MPI_LOGGING + +#define _USE_MATH_DEFINES +#include <math.h> +#include <string> +#include <iostream> +#include <exception> +#include <fstream> +#include <sstream> +#include <memory> + +#include "Core/Timer/Timer.h" +#include "Core/PointerDefinitions.h" +#include "Core/DataTypes.h" +#include "Core/VectorTypes.h" +#include "Core/Logger/Logger.h" + +#include "GridGenerator/geometries/Cuboid/Cuboid.h" +#include "GridGenerator/geometries/Sphere/Sphere.h" +#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h" +#include "GridGenerator/geometries/Conglomerate/Conglomerate.h" + +#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" +#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" +#include "GridGenerator/grid/GridFactory.h" +#include "GridGenerator/geometries/BoundingBox/BoundingBox.h" +#include "GridGenerator/utilities/communication.h" + +#include "GksMeshAdapter/GksMeshAdapter.h" + +#include "GksVtkAdapter/VTKInterface.h" + +#include "GksGpu/DataBase/DataBase.h" +#include "GksGpu/Parameters/Parameters.h" +#include "GksGpu/Initializer/Initializer.h" + +#include "GksGpu/FlowStateData/FlowStateData.cuh" +#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh" + +#include "GksGpu/BoundaryConditions/BoundaryCondition.h" +#include "GksGpu/BoundaryConditions/IsothermalWall.h" +#include "GksGpu/BoundaryConditions/Periodic.h" +#include "GksGpu/BoundaryConditions/Pressure.h" +#include "GksGpu/BoundaryConditions/AdiabaticWall.h" + +#include "GksGpu/Communication/Communicator.h" + +#include "GksGpu/TimeStepping/NestedTimeStep.h" + +#include "GksGpu/Analyzer/CupsAnalyzer.h" +#include "GksGpu/Analyzer/ConvergenceAnalyzer.h" +#include "GksGpu/Analyzer/TurbulenceAnalyzer.h" + +#include "GksGpu/CudaUtility/CudaUtility.h" +#include "GksGpu/Communication/MpiUtility.h" + +////////////////////////////////////////////////////////////////////////// + +void performanceTest( std::string path, std::string simulationName, uint decompositionDimension, uint nx, bool strongScaling ) +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + int mpiWorldSize = 1; + MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize); + + //CudaUtility::setCudaDevice(rank % devicesPerNode); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + int sideLength, rankX, rankY, rankZ; + + if( decompositionDimension == 2 ) + { + if (mpiWorldSize == 1) sideLength = 1; + else if (mpiWorldSize == 4) sideLength = 2; + else if (mpiWorldSize == 16) sideLength = 4; + + rankX = rank % sideLength; + rankY = rank / sideLength; + rankZ = 0; + } + else if( decompositionDimension == 3 ) + { + if (mpiWorldSize == 1) sideLength = 1; + else if (mpiWorldSize == 8) sideLength = 2; + + rankX = rank % sideLength; + rankY = ( rank % ( sideLength * sideLength ) ) / sideLength; + rankZ = rank / ( sideLength * sideLength ); + } + + *logging::out << logging::Logger::INFO_HIGH << sideLength << " " << rankX << " " << rankY << " " << rankZ << "\n"; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + real L = 1.0; + + real LX = L; + real LY = L; + real LZ = L; + + real dx = L / real(nx); + + if( strongScaling ) + { + if( decompositionDimension == 2 ) + { + LX /= double(sideLength); + LY /= double(sideLength); + } + else if( decompositionDimension == 3 ) + { + LX /= double(sideLength); + LY /= double(sideLength); + LZ /= double(sideLength); + } + } + + ////////////////////////////////////////////////////////////////////////// + + Parameters parameters; + + parameters.K = 0; + parameters.Pr = 1; + parameters.mu = 0.01; + + parameters.force.x = 0.1; + parameters.force.y = 0; + parameters.force.z = 0; + + parameters.dt = 0.0001; + parameters.dx = dx; + + parameters.lambdaRef = 1.0e-2; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto gridFactory = GridFactory::make(); + gridFactory->setGridStrategy(Device::CPU); + gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT); + + auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if( mpiWorldSize > 1 ) + { + //if( decompositionDimension == 2 ) + //{ + gridBuilder->addCoarseGrid( rankX*LX - 0.5*L - 5.0*dx, rankY*LY - 0.5*L - 5.0*dx, rankZ*LZ - 0.5*L - 5.0*dx, + (rankX*LX+1) - 0.5*L + 5.0*dx, (rankY*LY+1) - 0.5*L + 5.0*dx, (rankZ*LZ+1) - 0.5*L + 5.0*dx, dx); + //} + + //if( decompositionDimension == 3 ) + //{ + // gridBuilder->addCoarseGrid( rankX*LX - 0.5*L - 5.0*dx, rankY*LY - 0.5*L - 5.0*dx, rankZ*LZ - 0.5*L - 5.0*dx, + // rankX*LX + 0.5*L + 5.0*dx, rankY*LY + 0.5*L + 5.0*dx, rankZ*LZ + 0.5*L + 5.0*dx, dx); + //} + + gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rankX*LX - 0.5*L, (rankX+1)*LX - 0.5*L, + rankY*LY - 0.5*L, (rankY+1)*LY - 0.5*L, + rankZ*LZ - 0.5*L, (rankZ+1)*LZ - 0.5*L ) ); + } + else + { + gridBuilder->addCoarseGrid( -0.5*L, -0.5*L, -0.5*L, + 0.5*L, 0.5*L, 0.5*L, dx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + gridBuilder->setPeriodicBoundaryCondition(true, true, true); + + gridBuilder->buildGrids(GKS, false); + + MPI_Barrier(MPI_COMM_WORLD); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if( decompositionDimension == 1 && mpiWorldSize > 1 ) + { + gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, (rank + 1 + mpiWorldSize) % mpiWorldSize ); + + gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, (rank - 1 + mpiWorldSize) % mpiWorldSize ); + } + else if ( decompositionDimension == 2 && mpiWorldSize > 1 ) + { + int rankPX = ( (rankX + 1 + sideLength) % sideLength ) + rankY * sideLength; + int rankMX = ( (rankX - 1 + sideLength) % sideLength ) + rankY * sideLength; + int rankPY = rankX + ( (rankY + 1 + sideLength) % sideLength ) * sideLength; + int rankMY = rankX + ( (rankY - 1 + sideLength) % sideLength ) * sideLength; + + gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX); + + gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX); + + gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY); + + gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY); + } + else if ( decompositionDimension == 3 && mpiWorldSize > 1 ) + { + int rankPX = ( (rankX + 1 + sideLength) % sideLength ) + rankY * sideLength + rankZ * sideLength * sideLength; + int rankMX = ( (rankX - 1 + sideLength) % sideLength ) + rankY * sideLength + rankZ * sideLength * sideLength; + int rankPY = rankX + ( (rankY + 1 + sideLength) % sideLength ) * sideLength + rankZ * sideLength * sideLength; + int rankMY = rankX + ( (rankY - 1 + sideLength) % sideLength ) * sideLength + rankZ * sideLength * sideLength; + int rankPZ = rankX + rankY * sideLength + ( (rankZ + 1 + sideLength) % sideLength ) * sideLength * sideLength; + int rankMZ = rankX + rankY * sideLength + ( (rankZ - 1 + sideLength) % sideLength ) * sideLength * sideLength; + + gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX); + + gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX); + + gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY); + + gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY); + + gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::PZ, rankPZ); + + gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS ); + gridBuilder->setCommunicationProcess ( CommunicationDirections::MZ, rankMZ); + + *logging::out << logging::Logger::INFO_HIGH << rankPX << " " << rankMX << " " << rankPY << " " << rankMY << "\n"; + } + + gridBuilder->writeGridsToVtk(path + "/Grid_rank_" + std::to_string(rank) + "_lev_"); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + GksMeshAdapter meshAdapter( gridBuilder ); + + meshAdapter.inputGrid(); + + meshAdapter.getCommunicationIndices(); + + meshAdapter.findPeriodicBoundaryNeighbors(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto dataBase = std::make_shared<DataBase>( "GPU" ); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase ); + //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false ); + //SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.1, 0.0), false ); + + bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } ); + bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x > 0.5*L; } ); + + ////////////////////////////////////////////////////////////////////////// + + //SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase ); + //SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase ); + + bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } ); + bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y > 0.5*L; } ); + + ////////////////////////////////////////////////////////////////////////// + + //SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase ); + //SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase ); + SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase ); + + bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } ); + bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > 0.5*L; } ); + + ////////////////////////////////////////////////////////////////////////// + + if( mpiWorldSize == 1 ) + { + dataBase->boundaryConditions.push_back( bcMX ); + dataBase->boundaryConditions.push_back( bcPX ); + + dataBase->boundaryConditions.push_back( bcMY ); + dataBase->boundaryConditions.push_back( bcPY ); + } + if( decompositionDimension == 2 || mpiWorldSize == 1 ) + { + dataBase->boundaryConditions.push_back( bcMZ ); + dataBase->boundaryConditions.push_back( bcPZ ); + } + + ////////////////////////////////////////////////////////////////////////// + + *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n"; + + *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n"; + + *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n"; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + dataBase->setMesh( meshAdapter ); + + dataBase->setCommunicators( meshAdapter ); + + //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0].size()) << "\n"; + //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0][0].get()) << "\n"; + //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0][1].get()) << "\n"; + + CudaUtility::printCudaMemoryUsage(); + + Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables + { + return toConservedVariables( PrimitiveVariables( 1.0, 1.0, 0.0, 0.0, parameters.lambdaRef ), parameters.K ); + }); + + dataBase->copyDataHostToDevice(); + + for( auto bc : dataBase->boundaryConditions ) + for( uint level = 0; level < dataBase->numberOfLevels; level++ ) + bc->runBoundaryConditionKernel( dataBase, parameters, level ); + + Initializer::initializeDataUpdate(dataBase); + + dataBase->copyDataDeviceToHost(); + + if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_0", mpiWorldSize ); + + writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank) ); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 ); + + ////////////////////////////////////////////////////////////////////////// + + cupsAnalyzer.start(); + + for( uint iter = 1; iter <= 10000; iter++ ) + { + TimeStepping::nestedTimeStep(dataBase, parameters, 0); + + cupsAnalyzer.run( iter ); + + if( iter % 10000 == 0 ) + { + dataBase->copyDataDeviceToHost(); + + if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize ); + + writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) ); + } + } + + ////////////////////////////////////////////////////////////////////////// + + dataBase->copyDataDeviceToHost(); +} + +int main( int argc, char* argv[]) +{ + ////////////////////////////////////////////////////////////////////////// + + int rank = MpiUtility::getMpiRankBeforeInit(); + int mpiWorldSize = MpiUtility::getMpiWorldSizeBeforeInit(); + + ////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN32 + std::string path( "F:/Work/Computations/out/MultiGPU/" ); +#else + //std::string path( "/home/stephan/Computations/out/" ); + std::string path( "out/" ); +#endif + + std::string simulationName ( "MultiGPU_np_" + std::to_string(mpiWorldSize) ); + + ////////////////////////////////////////////////////////////////////////// + + bool strongScaling = false; + uint nx = 64; + + uint decompositionDimension = 2; + + if( argc > 1 ) path += argv[1]; path += "/"; + if( argc > 2 ) nx = atoi( argv[2] ); + if( argc > 3 ) strongScaling = true; + + ////////////////////////////////////////////////////////////////////////// + + logging::Logger::addStream(&std::cout); + + std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" ); + logging::Logger::addStream(&logFile); + + logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW); + logging::Logger::timeStamp(logging::Logger::ENABLE); + + ////////////////////////////////////////////////////////////////////////// + + // Important: for Cuda-Aware MPI the device must be set before MPI_Init() + int deviceCount = CudaUtility::getCudaDeviceCount(); + + if(deviceCount == 0) + { + std::stringstream msg; + msg << "No devices devices found!" << std::endl; + *logging::out << logging::Logger::WARNING << msg.str(); msg.str(""); + } + + CudaUtility::setCudaDevice( rank % deviceCount ); + + ////////////////////////////////////////////////////////////////////////// + + MPI_Init(&argc, &argv); + + ////////////////////////////////////////////////////////////////////////// + + if( sizeof(real) == 4 ) + *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n"; + else + *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n"; + + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + + try + { + performanceTest( path, simulationName, decompositionDimension, nx, strongScaling ); + } + catch (const std::exception& e) + { + *logging::out << logging::Logger::ERROR << e.what() << "\n"; + } + catch (const std::bad_alloc& e) + { + *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n"; + } + catch (...) + { + *logging::out << logging::Logger::ERROR << "Unknown exception!\n"; + } + + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + + logFile.close(); + + MPI_Finalize(); + + return 0; +} diff --git a/targets/apps/GKS/MultiGPU_2D/package.include b/targets/apps/GKS/MultiGPU_2D/package.include new file mode 100644 index 000000000..e69de29bb diff --git a/targets/apps/GKS/PoolFire/PoolFire.cpp b/targets/apps/GKS/PoolFire/PoolFire.cpp index 78f6b796e..a6abbe068 100644 --- a/targets/apps/GKS/PoolFire/PoolFire.cpp +++ b/targets/apps/GKS/PoolFire/PoolFire.cpp @@ -141,7 +141,7 @@ void thermalCavity( std::string path, std::string simulationName, uint restartIt //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - bool threeDimensional = true; + bool threeDimensional = false; if( threeDimensional ) { @@ -409,7 +409,7 @@ void thermalCavity( std::string path, std::string simulationName, uint restartIt if( //( iter >= 100 && iter % 10 == 0 ) || - ( iter % 10000 == 0 ) + ( iter % 400 == 0 ) ) { dataBase->copyDataDeviceToHost(); @@ -417,7 +417,7 @@ void thermalCavity( std::string path, std::string simulationName, uint restartIt writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) ); } - if( iter % 10000 == 0 ) + if( iter % 4000 == 0 ) { Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter ); } -- GitLab