From b6fddddaa056b83297b451e38084155b1fa22ac8 Mon Sep 17 00:00:00 2001 From: "LEGOLAS\\lenz" <lenz@irmb.tu-bs.de> Date: Thu, 18 Jul 2019 16:43:39 +0200 Subject: [PATCH] clean up of communication hiding --- CMakeLists.txt | 2 +- .../BoundaryConditions/AdiabaticWall.cu | 2 +- src/GksGpu/BoundaryConditions/Periodic.cu | 2 - src/GksGpu/Communication/Communicator.cpp | 57 +-- src/GksGpu/Communication/Communicator.cu | 4 - src/GksGpu/Communication/Communicator.h | 13 +- src/GksGpu/CudaUtility/CudaUtility.cpp | 9 +- src/GksGpu/CudaUtility/CudaUtility.h | 2 - src/GksGpu/DataBase/DataBaseAllocatorGPU.cpp | 12 +- src/GksGpu/TimeStepping/NestedTimeStep.cpp | 56 +-- targets/apps/GKS/MultiGPU/MultiGPU.cpp | 6 +- targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp | 328 +++++++++--------- 12 files changed, 218 insertions(+), 275 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 484ce62de..37aa443bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,7 +180,7 @@ IF (HULC.BUILD_VF_GKS) #add_subdirectory(targets/apps/GKS/Candle) add_subdirectory(targets/apps/GKS/MultiGPU) - #add_subdirectory(targets/apps/GKS/MultiGPU_2D) + add_subdirectory(targets/apps/GKS/MultiGPU_2D) ELSE() MESSAGE( STATUS "exclude Virtual Fluids GKS." ) ENDIF() diff --git a/src/GksGpu/BoundaryConditions/AdiabaticWall.cu b/src/GksGpu/BoundaryConditions/AdiabaticWall.cu index eff77e7fd..642dcbe80 100644 --- a/src/GksGpu/BoundaryConditions/AdiabaticWall.cu +++ b/src/GksGpu/BoundaryConditions/AdiabaticWall.cu @@ -54,7 +54,7 @@ void AdiabaticWall::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, parameters, this->startOfCellsPerLevel[ level ] ); - cudaDeviceSynchronize(); + //cudaDeviceSynchronize(); getLastCudaError("AdiabaticWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )"); } diff --git a/src/GksGpu/BoundaryConditions/Periodic.cu b/src/GksGpu/BoundaryConditions/Periodic.cu index 46ce82874..802054854 100644 --- a/src/GksGpu/BoundaryConditions/Periodic.cu +++ b/src/GksGpu/BoundaryConditions/Periodic.cu @@ -56,8 +56,6 @@ void Periodic::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, parameters, this->startOfCellsPerLevel[ level ] ); - cudaDeviceSynchronize(); - getLastCudaError("Periodic::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )"); } diff --git a/src/GksGpu/Communication/Communicator.cpp b/src/GksGpu/Communication/Communicator.cpp index bd5d4e31a..1e4d07f33 100644 --- a/src/GksGpu/Communication/Communicator.cpp +++ b/src/GksGpu/Communication/Communicator.cpp @@ -22,6 +22,9 @@ #include "CudaUtility/CudaUtility.h" +int Communicator::tagSendPositive = 0; +int Communicator::tagSendNegative = 1; + Communicator::Communicator( SPtr<DataBase> dataBase ) : myAllocator ( dataBase->myAllocator ) { @@ -51,84 +54,42 @@ void Communicator::initialize(GksMeshAdapter & adapter, uint level, uint directi this->opposingRank = adapter.communicationProcesses[direction]; } -void Communicator::exchangeData( SPtr<DataBase> dataBase ) +void Communicator::sendData( SPtr<DataBase> dataBase, int tag ) { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - MPI_Wait( &this->sendBufferIsReady, MPI_STATUSES_IGNORE ); - #ifdef USE_CUDA_AWARE_MPI this->copyFromMeshToSendBuffer( dataBase ); - MPI_Isend( this->sendBuffer, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, &this->sendBufferIsReady ); - - MPI_Recv ( this->recvBuffer, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE ); - - this->copyFromRecvBufferToMesh( dataBase ); + MPI_Isend( this->sendBuffer, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, &this->sendBufferIsReady ); #else // USE_CUDA_AWARE_MPI this->copyFromMeshToSendBuffer( dataBase ); - - this->myAllocator->copyBuffersDeviceToHost( shared_from_this() ); - - MPI_Isend( this->sendBufferHost, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, &this->sendBufferIsReady ); - - MPI_Recv ( this->recvBufferHost, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE ); - - this->myAllocator->copyBuffersHostToDevice( shared_from_this() ); - - this->copyFromRecvBufferToMesh( dataBase ); - -#endif // USE_CUDA_AWARE_MPI -} - -void Communicator::sendData( SPtr<DataBase> dataBase ) -{ -#ifdef USE_CUDA_AWARE_MPI - - this->copyFromMeshToSendBuffer( dataBase ); - - MPI_Isend( this->sendBuffer, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, &this->sendBufferIsReady ); - -#else // USE_CUDA_AWARE_MPI - - this->copyFromMeshToSendBuffer( dataBase ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::communicationStream ); this->myAllocator->copyBuffersDeviceToHost( shared_from_this() ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::copyDeviceToHostStream ); CudaUtility::synchronizeCudaStream( CudaUtility::communicationStream ); - MPI_Isend( this->sendBufferHost, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, &this->sendBufferIsReady ); + MPI_Isend( this->sendBufferHost, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, &this->sendBufferIsReady ); #endif // USE_CUDA_AWARE_MPI } -void Communicator::recvData( SPtr<DataBase> dataBase ) +void Communicator::recvData( SPtr<DataBase> dataBase, int tag ) { #ifdef USE_CUDA_AWARE_MPI - MPI_Recv ( this->recvBuffer, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE ); + MPI_Recv ( this->recvBuffer, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, MPI_STATUSES_IGNORE ); this->copyFromRecvBufferToMesh( dataBase ); #else // USE_CUDA_AWARE_MPI - MPI_Recv ( this->recvBufferHost, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE ); + MPI_Recv ( this->recvBufferHost, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, MPI_STATUSES_IGNORE ); this->myAllocator->copyBuffersHostToDevice( shared_from_this() ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::copyHostToDeviceStream ); this->copyFromRecvBufferToMesh( dataBase ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::communicationStream ); #endif // USE_CUDA_AWARE_MPI } diff --git a/src/GksGpu/Communication/Communicator.cu b/src/GksGpu/Communication/Communicator.cu index cd285cb7b..bf2ca31af 100644 --- a/src/GksGpu/Communication/Communicator.cu +++ b/src/GksGpu/Communication/Communicator.cu @@ -70,8 +70,6 @@ void Communicator::copyFromMeshToSendBuffer(const SPtr<DataBase> dataBase) this->sendIndices, this->sendBuffer, 0 ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::communicationStream ); } void Communicator::copyFromRecvBufferToMesh(const SPtr<DataBase> dataBase) @@ -86,8 +84,6 @@ void Communicator::copyFromRecvBufferToMesh(const SPtr<DataBase> dataBase) this->recvIndices, this->recvBuffer, 0 ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::communicationStream ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/GksGpu/Communication/Communicator.h b/src/GksGpu/Communication/Communicator.h index fb11607cc..7434f121f 100644 --- a/src/GksGpu/Communication/Communicator.h +++ b/src/GksGpu/Communication/Communicator.h @@ -29,28 +29,29 @@ struct VF_PUBLIC Communicator : public std::enable_shared_from_this<Communicator real* sendBuffer; // device real* recvBuffer; // device - real* sendBufferHost; - real* recvBufferHost; + real* sendBufferHost; // pinned memory + real* recvBufferHost; // pinned memory uint rank; uint opposingRank; MPI_Request sendBufferIsReady; + static int tagSendPositive; + static int tagSendNegative; + ////////////////////////////////////////////////////////////////////////// Communicator( SPtr<DataBase> dataBase ); void initialize( GksMeshAdapter& adapter, uint level, uint direction ); - void exchangeData( SPtr<DataBase> dataBase ); - void copyFromMeshToSendBuffer( SPtr<DataBase> dataBase ); void copyFromRecvBufferToMesh( SPtr<DataBase> dataBase ); - void sendData( SPtr<DataBase> dataBase ); - void recvData( SPtr<DataBase> dataBase ); + void sendData( SPtr<DataBase> dataBase, int tag ); + void recvData( SPtr<DataBase> dataBase, int tag ); }; #endif diff --git a/src/GksGpu/CudaUtility/CudaUtility.cpp b/src/GksGpu/CudaUtility/CudaUtility.cpp index e86156549..d77c928a8 100644 --- a/src/GksGpu/CudaUtility/CudaUtility.cpp +++ b/src/GksGpu/CudaUtility/CudaUtility.cpp @@ -10,8 +10,6 @@ cudaStream_t CudaUtility::computeStream = nullptr; cudaStream_t CudaUtility::communicationStream = nullptr; -cudaStream_t CudaUtility::copyDeviceToHostStream = nullptr; -cudaStream_t CudaUtility::copyHostToDeviceStream = nullptr; CudaUtility::CudaGrid::CudaGrid( uint numberOfEntities, uint threadsPerBlock, cudaStream_t stream ) { @@ -62,11 +60,12 @@ void CudaUtility::setCudaDevice(int device) // slide 5 int priority_high, priority_low; cudaDeviceGetStreamPriorityRange(&priority_low , &priority_high ) ; + + // the flag needs to be cudaStreamDefault to ensure synchronization with default stream + //cudaStreamCreateWithPriority (&communicationStream, cudaStreamDefault, priority_high ); + //cudaStreamCreateWithPriority (&computeStream , cudaStreamDefault, priority_low ); cudaStreamCreateWithPriority (&communicationStream, cudaStreamNonBlocking, priority_high ); cudaStreamCreateWithPriority (&computeStream , cudaStreamNonBlocking, priority_low ); - - cudaStreamCreate(©DeviceToHostStream); - cudaStreamCreate(©HostToDeviceStream); } int CudaUtility::getCudaDevice() diff --git a/src/GksGpu/CudaUtility/CudaUtility.h b/src/GksGpu/CudaUtility/CudaUtility.h index 9fac2a40a..716688d79 100644 --- a/src/GksGpu/CudaUtility/CudaUtility.h +++ b/src/GksGpu/CudaUtility/CudaUtility.h @@ -26,8 +26,6 @@ public: static cudaStream_t computeStream; static cudaStream_t communicationStream; - static cudaStream_t copyDeviceToHostStream; - static cudaStream_t copyHostToDeviceStream; static void printCudaMemoryUsage(); diff --git a/src/GksGpu/DataBase/DataBaseAllocatorGPU.cpp b/src/GksGpu/DataBase/DataBaseAllocatorGPU.cpp index 8001fc1c6..38287f412 100644 --- a/src/GksGpu/DataBase/DataBaseAllocatorGPU.cpp +++ b/src/GksGpu/DataBase/DataBaseAllocatorGPU.cpp @@ -285,18 +285,14 @@ void DataBaseAllocatorGPU::copyDataDeviceToDevice(SPtr<Communicator> dst, SPtr<C void DataBaseAllocatorGPU::copyBuffersDeviceToHost(SPtr<Communicator> communicator) { - //checkCudaErrors( cudaMemcpyAsync ( communicator->sendBufferHost, communicator->sendBuffer, LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfSendNodes, cudaMemcpyDeviceToHost, CudaUtility::copyDeviceToHostStream ) ); - checkCudaErrors( cudaMemcpyAsync ( communicator->sendBufferHost, communicator->sendBuffer, LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfSendNodes, cudaMemcpyDeviceToHost, CudaUtility::communicationStream ) ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::copyDeviceToHostStream ); + size_t size = LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfSendNodes; + cudaMemcpyAsync ( communicator->sendBufferHost, communicator->sendBuffer, size, cudaMemcpyDeviceToHost, CudaUtility::communicationStream ); } void DataBaseAllocatorGPU::copyBuffersHostToDevice(SPtr<Communicator> communicator) { - //checkCudaErrors( cudaMemcpyAsync ( communicator->recvBuffer, communicator->recvBufferHost, LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfRecvNodes, cudaMemcpyHostToDevice, CudaUtility::copyHostToDeviceStream ) ); - checkCudaErrors( cudaMemcpyAsync ( communicator->recvBuffer, communicator->recvBufferHost, LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfRecvNodes, cudaMemcpyHostToDevice, CudaUtility::communicationStream ) ); - - //CudaUtility::synchronizeCudaStream( CudaUtility::copyHostToDeviceStream ); + size_t size = LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfRecvNodes; + cudaMemcpyAsync ( communicator->recvBuffer, communicator->recvBufferHost, size, cudaMemcpyHostToDevice, CudaUtility::communicationStream ); } std::string DataBaseAllocatorGPU::getDeviceType() diff --git a/src/GksGpu/TimeStepping/NestedTimeStep.cpp b/src/GksGpu/TimeStepping/NestedTimeStep.cpp index ba4676629..6f35699d9 100644 --- a/src/GksGpu/TimeStepping/NestedTimeStep.cpp +++ b/src/GksGpu/TimeStepping/NestedTimeStep.cpp @@ -23,11 +23,6 @@ void TimeStepping::nestedTimeStep( SPtr<DataBase> dataBase, ////////////////////////////////////////////////////////////////////////// - //set different viscosity on specific levels - //if( level >= 3 ) parameters.mu = 1.0e-3; - - ////////////////////////////////////////////////////////////////////////// - if( level != dataBase->numberOfLevels - 1 ) { Interface::runFineToCoarse( dataBase, level ); @@ -42,6 +37,10 @@ void TimeStepping::nestedTimeStep( SPtr<DataBase> dataBase, ////////////////////////////////////////////////////////////////////////// + CudaUtility::synchronizeCudaDevice(); + + ////////////////////////////////////////////////////////////////////////// + FluxComputation::run( dataBase, parameters, level ); ////////////////////////////////////////////////////////////////////////// @@ -55,52 +54,31 @@ void TimeStepping::nestedTimeStep( SPtr<DataBase> dataBase, // ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - //for( uint direction = 0; direction < 6; direction++ ) - //{ - // if( dataBase->communicators[level][direction] != nullptr ) - // { - // dataBase->communicators[level][direction]->exchangeData(dataBase); - // } - //} - ////////////////////////////////////////////////////////////////////////// - //for( uint direction = 0; direction < 6; direction++ ) - //{ - // if( dataBase->communicators[level][direction] != nullptr ) - // { - // dataBase->communicators[level][direction]->sendData(dataBase); - // } - //} - //for( uint direction = 0; direction < 6; direction++ ) - //{ - // if( dataBase->communicators[level][direction] != nullptr ) - // { - // dataBase->communicators[level][direction]->recvData(dataBase); - // } - //} + ////////////////////////////////////////////////////////////////////////// // X ////////////////////////////////////////////////////////////////////////// - if( dataBase->communicators[level][0] != nullptr ) dataBase->communicators[level][0]->sendData(dataBase); - if( dataBase->communicators[level][1] != nullptr ) dataBase->communicators[level][1]->sendData(dataBase); + if( dataBase->communicators[level][0] != nullptr ) dataBase->communicators[level][0]->sendData(dataBase, Communicator::tagSendNegative); + if( dataBase->communicators[level][1] != nullptr ) dataBase->communicators[level][1]->sendData(dataBase, Communicator::tagSendPositive); - if( dataBase->communicators[level][0] != nullptr ) dataBase->communicators[level][0]->recvData(dataBase); - if( dataBase->communicators[level][1] != nullptr ) dataBase->communicators[level][1]->recvData(dataBase); + if( dataBase->communicators[level][0] != nullptr ) dataBase->communicators[level][0]->recvData(dataBase, Communicator::tagSendPositive); + if( dataBase->communicators[level][1] != nullptr ) dataBase->communicators[level][1]->recvData(dataBase, Communicator::tagSendNegative); ////////////////////////////////////////////////////////////////////////// // Y ////////////////////////////////////////////////////////////////////////// - if( dataBase->communicators[level][2] != nullptr ) dataBase->communicators[level][2]->sendData(dataBase); - if( dataBase->communicators[level][3] != nullptr ) dataBase->communicators[level][3]->sendData(dataBase); + if( dataBase->communicators[level][2] != nullptr ) dataBase->communicators[level][2]->sendData(dataBase, Communicator::tagSendNegative); + if( dataBase->communicators[level][3] != nullptr ) dataBase->communicators[level][3]->sendData(dataBase, Communicator::tagSendPositive); - if( dataBase->communicators[level][2] != nullptr ) dataBase->communicators[level][2]->recvData(dataBase); - if( dataBase->communicators[level][3] != nullptr ) dataBase->communicators[level][3]->recvData(dataBase); + if( dataBase->communicators[level][2] != nullptr ) dataBase->communicators[level][2]->recvData(dataBase, Communicator::tagSendPositive); + if( dataBase->communicators[level][3] != nullptr ) dataBase->communicators[level][3]->recvData(dataBase, Communicator::tagSendNegative); ////////////////////////////////////////////////////////////////////////// // Z ////////////////////////////////////////////////////////////////////////// - if( dataBase->communicators[level][4] != nullptr ) dataBase->communicators[level][4]->sendData(dataBase); - if( dataBase->communicators[level][5] != nullptr ) dataBase->communicators[level][5]->sendData(dataBase); + if( dataBase->communicators[level][4] != nullptr ) dataBase->communicators[level][4]->sendData(dataBase, Communicator::tagSendNegative); + if( dataBase->communicators[level][5] != nullptr ) dataBase->communicators[level][5]->sendData(dataBase, Communicator::tagSendPositive); - if( dataBase->communicators[level][4] != nullptr ) dataBase->communicators[level][4]->recvData(dataBase); - if( dataBase->communicators[level][5] != nullptr ) dataBase->communicators[level][5]->recvData(dataBase); + if( dataBase->communicators[level][4] != nullptr ) dataBase->communicators[level][4]->recvData(dataBase, Communicator::tagSendPositive); + if( dataBase->communicators[level][5] != nullptr ) dataBase->communicators[level][5]->recvData(dataBase, Communicator::tagSendNegative); } ////////////////////////////////////////////////////////////////////////// diff --git a/targets/apps/GKS/MultiGPU/MultiGPU.cpp b/targets/apps/GKS/MultiGPU/MultiGPU.cpp index d54356812..38fcdbad5 100644 --- a/targets/apps/GKS/MultiGPU/MultiGPU.cpp +++ b/targets/apps/GKS/MultiGPU/MultiGPU.cpp @@ -261,7 +261,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - const uint numberOfIterations = 10; + const uint numberOfIterations = 1000; CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, numberOfIterations ); @@ -324,7 +324,7 @@ int main( int argc, char* argv[]) ////////////////////////////////////////////////////////////////////////// bool strongScaling = false; - uint nx = 64; + uint nx = 128; if( argc > 1 ) path += argv[1]; path += "/"; if( argc > 2 ) nx = atoi( argv[2] ); @@ -363,7 +363,7 @@ int main( int argc, char* argv[]) ////////////////////////////////////////////////////////////////////////// if( sizeof(real) == 4 ) - *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n"; + *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n"; else *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n"; diff --git a/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp b/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp index 0288c6e50..6d5cdd57f 100644 --- a/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp +++ b/targets/apps/GKS/MultiGPU_2D/MultiGPU_2D.cpp @@ -70,29 +70,50 @@ void performanceTest( std::string path, std::string simulationName, uint decompo //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - int sideLength, rankX, rankY, rankZ; + int sideLengthX, sideLengthY, sideLengthZ, rankX, rankY, rankZ; - if( decompositionDimension == 2 ) + if( decompositionDimension == 1 ) { - if (mpiWorldSize == 1) sideLength = 1; - else if (mpiWorldSize == 4) sideLength = 2; - else if (mpiWorldSize == 16) sideLength = 4; - - rankX = rank % sideLength; - rankY = rank / sideLength; + if (mpiWorldSize == 1 ) { sideLengthX = 1 ; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 2 ) { sideLengthX = 2 ; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 4 ) { sideLengthX = 4 ; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 8 ) { sideLengthX = 8 ; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 16) { sideLengthX = 16; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 32) { sideLengthX = 32; sideLengthY = 1; sideLengthZ = 1; } + + rankX = rank; + rankY = 0; + rankZ = 0; + } + else if( decompositionDimension == 2 ) + { + if (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 2 ) { sideLengthX = 2; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 4 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 1; } + else if (mpiWorldSize == 8 ) { sideLengthX = 4; sideLengthY = 2; sideLengthZ = 1; } + else if (mpiWorldSize == 16) { sideLengthX = 4; sideLengthY = 4; sideLengthZ = 1; } + else if (mpiWorldSize == 32) { sideLengthX = 8; sideLengthY = 4; sideLengthZ = 1; } + + rankX = rank % sideLengthX; + rankY = rank / sideLengthX; rankZ = 0; } else if( decompositionDimension == 3 ) { - if (mpiWorldSize == 1) sideLength = 1; - else if (mpiWorldSize == 8) sideLength = 2; - - rankX = rank % sideLength; - rankY = ( rank % ( sideLength * sideLength ) ) / sideLength; - rankZ = rank / ( sideLength * sideLength ); + if (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 2 ) { sideLengthX = 2; sideLengthY = 1; sideLengthZ = 1; } + else if (mpiWorldSize == 4 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 1; } + else if (mpiWorldSize == 8 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 2; } + else if (mpiWorldSize == 16) { sideLengthX = 4; sideLengthY = 2; sideLengthZ = 2; } + else if (mpiWorldSize == 32) { sideLengthX = 4; sideLengthY = 4; sideLengthZ = 2; } + + rankX = rank % sideLengthX; + rankY = ( rank % ( sideLengthX * sideLengthY ) ) / sideLengthX; + rankZ = rank / ( sideLengthY * sideLengthX ); } - *logging::out << logging::Logger::INFO_HIGH << sideLength << " " << rankX << " " << rankY << " " << rankZ << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "rank = " << rankX << " " << rankY << " " << rankZ << "\n"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -106,16 +127,20 @@ void performanceTest( std::string path, std::string simulationName, uint decompo if( strongScaling ) { - if( decompositionDimension == 2 ) + if( decompositionDimension == 1 ) { - LX /= double(sideLength); - LY /= double(sideLength); + LX /= double(sideLengthX); + } + else if( decompositionDimension == 2 ) + { + LX /= double(sideLengthX); + LY /= double(sideLengthY); } else if( decompositionDimension == 3 ) { - LX /= double(sideLength); - LY /= double(sideLength); - LZ /= double(sideLength); + LX /= double(sideLengthX); + LY /= double(sideLengthY); + LZ /= double(sideLengthZ); } } @@ -127,7 +152,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo parameters.Pr = 1; parameters.mu = 0.01; - parameters.force.x = 0.1; + parameters.force.x = 0; parameters.force.y = 0; parameters.force.z = 0; @@ -146,98 +171,60 @@ void performanceTest( std::string path, std::string simulationName, uint decompo //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - if( mpiWorldSize > 1 ) - { - //if( decompositionDimension == 2 ) - //{ - gridBuilder->addCoarseGrid( rankX*LX - 0.5*L - 5.0*dx, rankY*LY - 0.5*L - 5.0*dx, rankZ*LZ - 0.5*L - 5.0*dx, - (rankX*LX+1) - 0.5*L + 5.0*dx, (rankY*LY+1) - 0.5*L + 5.0*dx, (rankZ*LZ+1) - 0.5*L + 5.0*dx, dx); - //} - - //if( decompositionDimension == 3 ) - //{ - // gridBuilder->addCoarseGrid( rankX*LX - 0.5*L - 5.0*dx, rankY*LY - 0.5*L - 5.0*dx, rankZ*LZ - 0.5*L - 5.0*dx, - // rankX*LX + 0.5*L + 5.0*dx, rankY*LY + 0.5*L + 5.0*dx, rankZ*LZ + 0.5*L + 5.0*dx, dx); - //} - - gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rankX*LX - 0.5*L, (rankX+1)*LX - 0.5*L, - rankY*LY - 0.5*L, (rankY+1)*LY - 0.5*L, - rankZ*LZ - 0.5*L, (rankZ+1)*LZ - 0.5*L ) ); - } - else - { - gridBuilder->addCoarseGrid( -0.5*L, -0.5*L, -0.5*L, - 0.5*L, 0.5*L, 0.5*L, dx); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - gridBuilder->setPeriodicBoundaryCondition(true, true, true); + real xOverlap = ( sideLengthX == 1 ) ? 0.0 : 5.0*dx; + real yOverlap = ( sideLengthY == 1 ) ? 0.0 : 5.0*dx; + real zOverlap = ( sideLengthZ == 1 ) ? 0.0 : 5.0*dx; - gridBuilder->buildGrids(GKS, false); + gridBuilder->addCoarseGrid( rankX*LX - 0.5*L - xOverlap, rankY*LY - 0.5*L - yOverlap, rankZ*LZ - 0.5*L - zOverlap, + (rankX*LX+1) - 0.5*L + xOverlap, (rankY*LY+1) - 0.5*L + yOverlap, (rankZ*LZ+1) - 0.5*L + zOverlap, dx); - MPI_Barrier(MPI_COMM_WORLD); + gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rankX*LX - 0.5*L, (rankX+1)*LX - 0.5*L, + rankY*LY - 0.5*L, (rankY+1)*LY - 0.5*L, + rankZ*LZ - 0.5*L, (rankZ+1)*LZ - 0.5*L ) ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - if( decompositionDimension == 1 && mpiWorldSize > 1 ) - { - gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, (rank + 1 + mpiWorldSize) % mpiWorldSize ); + gridBuilder->setPeriodicBoundaryCondition(sideLengthX == 1, sideLengthY == 1, sideLengthZ == 1); - gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, (rank - 1 + mpiWorldSize) % mpiWorldSize ); - } - else if ( decompositionDimension == 2 && mpiWorldSize > 1 ) - { - int rankPX = ( (rankX + 1 + sideLength) % sideLength ) + rankY * sideLength; - int rankMX = ( (rankX - 1 + sideLength) % sideLength ) + rankY * sideLength; - int rankPY = rankX + ( (rankY + 1 + sideLength) % sideLength ) * sideLength; - int rankMY = rankX + ( (rankY - 1 + sideLength) % sideLength ) * sideLength; + *logging::out << logging::Logger::INFO_HIGH << "periodicity = " << (sideLengthX == 1) << " " << (sideLengthY == 1) << " " << (sideLengthZ == 1) << "\n"; - gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX); + gridBuilder->buildGrids(GKS, false); - gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX); + MPI_Barrier(MPI_COMM_WORLD); - gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY); - } - else if ( decompositionDimension == 3 && mpiWorldSize > 1 ) + if( mpiWorldSize > 1 ) { - int rankPX = ( (rankX + 1 + sideLength) % sideLength ) + rankY * sideLength + rankZ * sideLength * sideLength; - int rankMX = ( (rankX - 1 + sideLength) % sideLength ) + rankY * sideLength + rankZ * sideLength * sideLength; - int rankPY = rankX + ( (rankY + 1 + sideLength) % sideLength ) * sideLength + rankZ * sideLength * sideLength; - int rankMY = rankX + ( (rankY - 1 + sideLength) % sideLength ) * sideLength + rankZ * sideLength * sideLength; - int rankPZ = rankX + rankY * sideLength + ( (rankZ + 1 + sideLength) % sideLength ) * sideLength * sideLength; - int rankMZ = rankX + rankY * sideLength + ( (rankZ - 1 + sideLength) % sideLength ) * sideLength * sideLength; + int rankPX = ( (rankX + 1 + sideLengthX) % sideLengthX ) + rankY * sideLengthX + rankZ * sideLengthX * sideLengthY; + int rankMX = ( (rankX - 1 + sideLengthX) % sideLengthX ) + rankY * sideLengthX + rankZ * sideLengthX * sideLengthY; + int rankPY = rankX + ( (rankY + 1 + sideLengthY) % sideLengthY ) * sideLengthX + rankZ * sideLengthX * sideLengthY; + int rankMY = rankX + ( (rankY - 1 + sideLengthY) % sideLengthY ) * sideLengthX + rankZ * sideLengthX * sideLengthY; + int rankPZ = rankX + rankY * sideLengthX + ( (rankZ + 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY; + int rankMZ = rankX + rankY * sideLengthX + ( (rankZ - 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY; - gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX); + if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS ); + if( sideLengthX > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX); - gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX); + if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS ); + if( sideLengthX > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX); - gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY); + if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS ); + if( sideLengthY > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY); - gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY); + if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS ); + if( sideLengthY > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY); - gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::PZ, rankPZ); + if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS ); + if( sideLengthZ > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PZ, rankPZ); - gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS ); - gridBuilder->setCommunicationProcess ( CommunicationDirections::MZ, rankMZ); + if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS ); + if( sideLengthZ > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MZ, rankMZ); - *logging::out << logging::Logger::INFO_HIGH << rankPX << " " << rankMX << " " << rankPY << " " << rankMY << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "neighborRanks = " << rankPX << " " << rankMX << " " << rankPY << " " << rankMY << " " << rankPY << " " << rankMY << "\n"; } - gridBuilder->writeGridsToVtk(path + "/Grid_rank_" + std::to_string(rank) + "_lev_"); + //gridBuilder->writeGridsToVtk(path + "/Grid_rank_" + std::to_string(rank) + "_lev_"); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -245,9 +232,7 @@ void performanceTest( std::string path, std::string simulationName, uint decompo meshAdapter.inputGrid(); - meshAdapter.getCommunicationIndices(); - - meshAdapter.findPeriodicBoundaryNeighbors(); + if( sideLengthX == 1 || sideLengthY == 1 || sideLengthZ == 1 ) meshAdapter.findPeriodicBoundaryNeighbors(); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -260,58 +245,49 @@ void performanceTest( std::string path, std::string simulationName, uint decompo SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase ); SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase ); - //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false ); - //SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.1, 0.0), false ); - bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } ); - bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x > 0.5*L; } ); + if( sideLengthX == 1 ) bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } ); + if( sideLengthX == 1 ) bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x > 0.5*L; } ); ////////////////////////////////////////////////////////////////////////// - //SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase ); - //SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase ); SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase ); SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase ); - bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } ); - bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y > 0.5*L; } ); + if( sideLengthY == 1 ) bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } ); + if( sideLengthY == 1 ) bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y > 0.5*L; } ); ////////////////////////////////////////////////////////////////////////// - //SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase ); - //SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase ); SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase ); SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase ); - bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } ); - bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > 0.5*L; } ); + if( sideLengthZ == 1 ) bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } ); + if( sideLengthZ == 1 ) bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > 0.5*L; } ); ////////////////////////////////////////////////////////////////////////// - if( mpiWorldSize == 1 ) - { - dataBase->boundaryConditions.push_back( bcMX ); - dataBase->boundaryConditions.push_back( bcPX ); + if( sideLengthX == 1 ) dataBase->boundaryConditions.push_back( bcMX ); + if( sideLengthX == 1 ) dataBase->boundaryConditions.push_back( bcPX ); - dataBase->boundaryConditions.push_back( bcMY ); - dataBase->boundaryConditions.push_back( bcPY ); - } - if( decompositionDimension == 2 || mpiWorldSize == 1 ) - { - dataBase->boundaryConditions.push_back( bcMZ ); - dataBase->boundaryConditions.push_back( bcPZ ); - } + if( sideLengthY == 1 ) dataBase->boundaryConditions.push_back( bcMY ); + if( sideLengthY == 1 ) dataBase->boundaryConditions.push_back( bcPY ); + + if( sideLengthZ == 1 ) dataBase->boundaryConditions.push_back( bcMZ ); + if( sideLengthZ == 1 ) dataBase->boundaryConditions.push_back( bcPZ ); ////////////////////////////////////////////////////////////////////////// - *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n"; - *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "NumberOfBoundaryConditions = " << (int)dataBase->boundaryConditions.size() << "\n"; + + if( sideLengthX == 1 ) *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n"; + if( sideLengthX == 1 ) *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n"; - *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n"; - *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n"; + if( sideLengthY == 1 ) *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n"; + if( sideLengthY == 1 ) *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n"; - *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n"; - *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n"; + if( sideLengthZ == 1 ) *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n"; + if( sideLengthZ == 1 ) *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -321,18 +297,38 @@ void performanceTest( std::string path, std::string simulationName, uint decompo dataBase->setMesh( meshAdapter ); dataBase->setCommunicators( meshAdapter ); - - //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0].size()) << "\n"; - //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0][0].get()) << "\n"; - //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0][1].get()) << "\n"; CudaUtility::printCudaMemoryUsage(); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables { - return toConservedVariables( PrimitiveVariables( 1.0, 1.0, 0.0, 0.0, parameters.lambdaRef ), parameters.K ); + real U = 0.1; + + real ULocal = /*0.1*/ + U * sin( 2.0 * M_PI * cellCenter.x ) * cos( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z ); + real VLocal = /*0.1*/ - U * cos( 2.0 * M_PI * cellCenter.x ) * sin( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z ); + real WLocal = 0.1; + + real rho = 1.0; + + real p0 = 0.5 * rho / parameters.lambdaRef; + + real pLocal = p0 + rho * U * U / 16.0 * ( cos( 2.0 * M_PI * 2.0 * cellCenter.x ) + cos( 2.0 * M_PI * 2.0 * cellCenter.y ) ) * ( 2.0 + cos( 2.0 * M_PI * 2.0 * cellCenter.z ) ); + + real rhoLocal = 2.0 * pLocal * parameters.lambdaRef; + + //ULocal = cellCenter.x; + //VLocal = cellCenter.y; + //WLocal = cellCenter.z; + + //rhoLocal = rank + 1; + + return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K ); }); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + dataBase->copyDataHostToDevice(); for( auto bc : dataBase->boundaryConditions ) @@ -346,45 +342,62 @@ void performanceTest( std::string path, std::string simulationName, uint decompo if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_0", mpiWorldSize ); writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank) ); - + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 ); + const uint numberOfIterations = 1000; - ////////////////////////////////////////////////////////////////////////// + CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, numberOfIterations ); + + MPI_Barrier(MPI_COMM_WORLD); cupsAnalyzer.start(); - for( uint iter = 1; iter <= 10000; iter++ ) + for( uint iter = 1; iter <= numberOfIterations; iter++ ) { TimeStepping::nestedTimeStep(dataBase, parameters, 0); + } - cupsAnalyzer.run( iter ); + cupsAnalyzer.run( numberOfIterations, parameters.dt ); - if( iter % 10000 == 0 ) - { - dataBase->copyDataDeviceToHost(); + ////////////////////////////////////////////////////////////////////////// - if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize ); + dataBase->copyDataDeviceToHost(); - writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) ); - } - } + if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_final", mpiWorldSize ); + writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final_rank_" + std::to_string(rank) ); + ////////////////////////////////////////////////////////////////////////// - dataBase->copyDataDeviceToHost(); + int crashCellIndex = dataBase->getCrashCellIndex(); + if( crashCellIndex >= 0 ) + { + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "============= Simulation Crashed!!! =============\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n"; + } } int main( int argc, char* argv[]) { ////////////////////////////////////////////////////////////////////////// - + + int rank = 0; + int mpiWorldSize = 1; +#ifdef USE_CUDA_AWARE_MPI int rank = MpiUtility::getMpiRankBeforeInit(); int mpiWorldSize = MpiUtility::getMpiWorldSizeBeforeInit(); +#else + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize); +#endif ////////////////////////////////////////////////////////////////////////// @@ -395,21 +408,22 @@ int main( int argc, char* argv[]) std::string path( "out/" ); #endif - std::string simulationName ( "MultiGPU_np_" + std::to_string(mpiWorldSize) ); - ////////////////////////////////////////////////////////////////////////// - bool strongScaling = false; + bool strongScaling = true; uint nx = 64; + uint decompositionDimension = 3; - uint decompositionDimension = 2; - - if( argc > 1 ) path += argv[1]; path += "/"; - if( argc > 2 ) nx = atoi( argv[2] ); + if( argc > 1 ) nx = atoi( argv[1] ); + if( argc > 2 ) decompositionDimension = atoi( argv[2] ); if( argc > 3 ) strongScaling = true; ////////////////////////////////////////////////////////////////////////// + std::string simulationName ( "MultiGPU_np_" + std::to_string(mpiWorldSize) ); + + ////////////////////////////////////////////////////////////////////////// + logging::Logger::addStream(&std::cout); std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" ); @@ -434,7 +448,9 @@ int main( int argc, char* argv[]) ////////////////////////////////////////////////////////////////////////// +#ifdef USE_CUDA_AWARE_MPI MPI_Init(&argc, &argv); +#endif ////////////////////////////////////////////////////////////////////////// @@ -453,15 +469,15 @@ int main( int argc, char* argv[]) } catch (const std::exception& e) { - *logging::out << logging::Logger::ERROR << e.what() << "\n"; + *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n"; } catch (const std::bad_alloc& e) { - *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n"; } catch (...) { - *logging::out << logging::Logger::ERROR << "Unknown exception!\n"; + *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n"; } ////////////////////////////////////////////////////////////////////////// -- GitLab