From f8794af18e1430e9653e11d14b059cc61797342d Mon Sep 17 00:00:00 2001 From: Anna Wellmann <a.wellmann@tu-bs.de> Date: Tue, 3 Jan 2023 09:26:35 +0000 Subject: [PATCH] Add timers for performance optimization --- apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp | 2 +- .../LBM/ActuatorLine/configActuatorLine.txt | 6 +- .../Calculation/UpdateGrid27.cpp | 31 ++++++++++ .../Calculation/UpdateGrid27.h | 4 ++ src/gpu/VirtualFluids_GPU/Output/Timer.cpp | 7 +++ src/gpu/VirtualFluids_GPU/Output/Timer.h | 2 + .../PreCollisionInteractor/ActuatorFarm.cu | 60 ++++++++++++++++--- .../PreCollisionInteractor/ActuatorFarm.h | 3 + 8 files changed, 102 insertions(+), 13 deletions(-) diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp index 77fa28738..8b5502329 100644 --- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp +++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp @@ -251,7 +251,7 @@ void multipleLevel(const std::string& configPath) const real epsilon = dx*exp2(-level)*2.0; // width of gaussian smearing const real density = 1.225f; const uint nBlades = 3; - const uint nBladeNodes = 404; // passt zu auflösung von 105 Knoten + const uint nBladeNodes = reference_diameter * 4 + 3; // passt zu auflösung von 105 Knoten VF_LOG_INFO("number of blade nodes ALM = {}", nBladeNodes); const real tipspeed_ratio = 7.5f; // tipspeed ratio = angular vel * radius / inflow vel const real omega = 2*tipspeed_ratio*velocity/reference_diameter; diff --git a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt index f637c47f2..fc8403bab 100644 --- a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt +++ b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt @@ -8,12 +8,12 @@ Path = /work/y0078217/Results/ActuatorLine105 GridPath=. ################################################## ReferenceDiameter=126 -NodesPerDiameter=105 +NodesPerDiameter=32 Velocity=9 ################################################## tStartOut=0 -tOut=5000 -tEnd=5000 +tOut=1 +tEnd=1 ################################################## # tStartTmpAveraging=100 diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 4136614df..26632f18d 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -13,8 +13,13 @@ #include "CollisionStrategy.h" #include "RefinementStrategy.h" +#include "Output/Timer.h" + void UpdateGrid27::updateGrid(int level, unsigned int t) { + + timer->startTimer(); + ////////////////////////////////////////////////////////////////////////// if (level != para->getFine()) { @@ -22,18 +27,27 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) updateGrid(level + 1, t); } + ////////////////////////////////////////////////////////////////////////// + std::cout << "updateGrid: level = " << level << ", t = " << t << std::endl; + interactWithProbes(level, t); + std::cout << " interactWithProbes, " << timer->startStopGetElapsed() << std::endl; ////////////////////////////////////////////////////////////////////////// collision(this, para.get(), level, t); + std::cout << " collision, " << timer->startStopGetElapsed() << std::endl; + ////////////////////////////////////////////////////////////////////////// postCollisionBC(level, t); + std::cout << " postCollisionBC, " << timer->startStopGetElapsed() << std::endl; + + ////////////////////////////////////////////////////////////////////////// swapBetweenEvenAndOddTimestep(level); @@ -45,20 +59,35 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) calcTurbulentViscosity(level); + + std::cout << " calcTurbulentViscosity, " << timer->startStopGetElapsed() << std::endl; + ////////////////////////////////////////////////////////////////////////// this->preCollisionBC(level, t); + std::cout << " preCollisionBC, " << timer->startStopGetElapsed() << std::endl; + + ////////////////////////////////////////////////////////////////////////// if( level != para->getFine() ) { refinement(this, para.get(), level); } + + std::cout << " refinement, " << timer->startStopGetElapsed() << std::endl; + ////////////////////////////////////////////////////////////////////////// interactWithActuators(level, t); + std::cout << " interactWithActuators, " << timer->startStopGetElapsed() << std::endl; + std::cout << "total time, " << timer->getTotalElapsedTime() << std::endl; + timer->resetTimer(); + + + } void UpdateGrid27::collisionAllNodes(int level, unsigned int t) @@ -391,4 +420,6 @@ UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SP this->bcKernelManager = std::make_shared<BCKernelManager>(para, bcFactory); this->adKernelManager = std::make_shared<ADKernelManager>(para); this->gridScalingKernelManager = std::make_shared<GridScalingKernelManager>(para, scalingFactory); + timer = new Timer("ALM blade performance"); + timer->initTimer(); } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 8ce2cf5bf..da6d53c17 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -17,6 +17,8 @@ class BoundaryConditionFactory; class GridScalingFactory; class TurbulenceModelFactory; class UpdateGrid27; +class Timer; + using CollisionStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level, unsigned int t)>; using RefinementStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level)>; @@ -84,6 +86,8 @@ private: std::shared_ptr<GridScalingKernelManager> gridScalingKernelManager; //! \property tmFactory is a shared pointer to an object of TurbulenceModelFactory std::shared_ptr<TurbulenceModelFactory> tmFactory; + + Timer* timer = nullptr; }; #endif diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp index 74a706165..b9ec5d9ba 100644 --- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp @@ -25,6 +25,13 @@ void Timer::stopTimer() this->totalElapsedTime += this->elapsedTime; } +float Timer::startStopGetElapsed() +{ + this->stopTimer(); + this->startTimer(); + return this->elapsedTime; +} + void Timer::resetTimer() { this->elapsedTime = 0.0; diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.h b/src/gpu/VirtualFluids_GPU/Output/Timer.h index d035cbb6c..f6b6bad3c 100644 --- a/src/gpu/VirtualFluids_GPU/Output/Timer.h +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.h @@ -34,6 +34,8 @@ class Timer float getElapsedTime(){ return this->elapsedTime; } float getTotalElapsedTime(){ return this->totalElapsedTime; } + float startStopGetElapsed(); + private: cudaEvent_t start_t, stop_t; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu index 0e70df88e..72d349ff5 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu @@ -14,6 +14,9 @@ #include "GPU/CudaMemoryManager.h" #include <lbm/constants/NumericConstants.h> #include <logger/Logger.h> +#include <ostream> + +#include "Output/Timer.h" using namespace vf::lbm::constant; @@ -267,26 +270,38 @@ void ActuatorFarm::addTurbine(real posX, real posY, real posZ, real diameter, re void ActuatorFarm::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) { - if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorFarm but BodyForce is not set in Parameter."); + if (!para->getIsBodyForce()) + throw std::runtime_error("try to allocate ActuatorFarm but BodyForce is not set in Parameter."); this->forceRatio = para->getForceRatio(); this->initTurbineGeometries(cudaMemoryManager); - this->initBladeCoords(cudaMemoryManager); + this->initBladeCoords(cudaMemoryManager); this->initBladeIndices(para, cudaMemoryManager); this->initBladeVelocities(cudaMemoryManager); - this->initBladeForces(cudaMemoryManager); - this->initBoundingSpheres(para, cudaMemoryManager); + this->initBladeForces(cudaMemoryManager); + this->initBoundingSpheres(para, cudaMemoryManager); this->streamIndex = 0; + + bladeTimer = new Timer("ALM blade performance"); + bladeTimer->initTimer(); } -void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, unsigned int t) +void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int currentLevel, unsigned int t) { - if (level != this->level) return; + if (currentLevel != this->level) return; + bool useTimer = false; cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::ActuatorFarm, this->streamIndex); + if (useTimer) + std::cout << "ActuatorFarm::interact: level = " << currentLevel << ", t = " << t << " useHostArrays = " << useHostArrays <<std::endl; + bladeTimer->startTimer(); + if(useHostArrays) cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); - vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfNodes); + vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(currentLevel)->numberofthreads, this->numberOfNodes); + + if (useTimer) + std::cout << " cudaCopyBladeCoordsHtoD, " << bladeTimer->startStopGetElapsed() << std::endl; interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads, 0, stream >>>( para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ, @@ -299,14 +314,28 @@ void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManage this->turbinePosXD, this->turbinePosYD, this->turbinePosZD, this->bladeIndicesD, para->getVelocityRatio(), this->invDeltaX); + if (useTimer) + std::cout << " interpolateVelocities, " << bladeTimer->startStopGetElapsed() << std::endl; + cudaStreamSynchronize(stream); if(useHostArrays) cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this); + + if (useTimer) + std::cout << " cudaCopyBladeVelocitiesDtoH, " << bladeTimer->startStopGetElapsed() << std::endl; + this->calcBladeForces(); + + if (useTimer) + std::cout << " calcBladeForces, " << bladeTimer->startStopGetElapsed() << std::endl; + this->swapDeviceArrays(); if(useHostArrays) cudaMemoryManager->cudaCopyBladeForcesHtoD(this); - vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfIndices); + if (useTimer) + std::cout << " cudaCopyBladeForcesHtoD, " << bladeTimer->startStopGetElapsed() << std::endl; + + vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(currentLevel)->numberofthreads, this->numberOfIndices); applyBodyForces<<<sphereGrid.grid, sphereGrid.threads, 0, stream>>>( para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ, @@ -318,12 +347,25 @@ void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManage this->turbinePosXD, this->turbinePosYD, this->turbinePosZD, this->boundingSphereIndicesD, this->numberOfIndices, this->invEpsilonSqrd, this->factorGaussian); + + if (useTimer) + std::cout << " applyBodyForces, " << bladeTimer->startStopGetElapsed() << std::endl; + cudaMemoryManager->cudaCopyBladeOrientationsHtoD(this); + + if (useTimer) + std::cout << " cudaCopyBladeOrientationsHtoD, " << bladeTimer->startStopGetElapsed() << std::endl; + if (useTimer) + std::cout << "total time, " << bladeTimer->getTotalElapsedTime() << std::endl; + bladeTimer->resetTimer(); + cudaStreamSynchronize(stream); + + } -void ActuatorFarm::free(Parameter* para, CudaMemoryManager* cudaMemoryManager) +void ActuatorFarm::free(Parameter* /*para*/, CudaMemoryManager* cudaMemoryManager) { cudaMemoryManager->cudaFreeBladeGeometries(this); cudaMemoryManager->cudaFreeBladeOrientations(this); diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h index 8e21cdb6b..e6066e286 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h @@ -10,6 +10,7 @@ using namespace vf::lbm::constant; class Parameter; class GridProvider; +class Timer; using namespace vf::lbm::constant; class ActuatorFarm : public PreCollisionInteractor @@ -192,6 +193,8 @@ private: uint numberOfNodes; real forceRatio, factorGaussian, invEpsilonSqrd, invDeltaX; int streamIndex; + + Timer* bladeTimer = nullptr; }; #endif -- GitLab