From 72b699b325afd48c5cc9f27ccf594fcfca0a6b21 Mon Sep 17 00:00:00 2001 From: Hkorb <henry.korb@geo.uu.se> Date: Fri, 17 Sep 2021 17:43:18 +0200 Subject: [PATCH] refactor probe --- .../GPU/CudaMemoryManager.cpp | 41 +++--- .../VirtualFluids_GPU/GPU/CudaMemoryManager.h | 8 +- src/gpu/VirtualFluids_GPU/Visitor/Probe.cu | 121 +++++++++--------- src/gpu/VirtualFluids_GPU/Visitor/Probe.h | 16 +-- 4 files changed, 86 insertions(+), 100 deletions(-) diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index 75e453282..84104b990 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -2916,37 +2916,34 @@ void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level) checkCudaErrors( cudaFree (probe->getProbeStruct(level)->quantitiesArrayD) ); } -void CudaMemoryManager::cudaAllocProbeQuantities(Probe* probe, int level) +void CudaMemoryManager::cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level) { - size_t tmpV = probe->getPostProcessingVariables().size()*sizeof(int); - size_t tmpO = int(PostProcessingVariable::LAST)*sizeof(int); - - checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->arrayOffsetsH, tmpO) ); - checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesH, tmpV) ); - - checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->arrayOffsetsD, tmpO) ); - checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->quantitiesD, tmpV) ); - setMemsizeGPU(tmpO+tmpV, false); + size_t tmpA = int(PostProcessingVariable::LAST)*sizeof(int); + size_t tmpQ = int(PostProcessingVariable::LAST)*sizeof(bool); + checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesH, tmpQ) ); + checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->quantitiesD, tmpQ) ); + checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->arrayOffsetsH, tmpA) ); + checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->arrayOffsetsD, tmpA) ); + setMemsizeGPU(tmpA+tmpQ, false); } -void CudaMemoryManager::cudaCopyProbeQuantitiesHtoD(Probe* probe, int level) -{ - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyHostToDevice) ); - +void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level) +{ + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyHostToDevice) ); } -void CudaMemoryManager::cudaCopyProbeQuantitiesDtoH(Probe* probe, int level) + +void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyDeviceToHost) ); } -void CudaMemoryManager::cudaFreeProbeQuantities(Probe* probe, int level) +void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level) { - checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->arrayOffsetsH) ); checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesH) ); - - checkCudaErrors( cudaFree (probe->getProbeStruct(level)->arrayOffsetsD) ); + checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->arrayOffsetsH) ); checkCudaErrors( cudaFree (probe->getProbeStruct(level)->quantitiesD) ); + checkCudaErrors( cudaFree (probe->getProbeStruct(level)->arrayOffsetsD) ); } diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h index b11cac610..1d1311bf1 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h @@ -367,10 +367,10 @@ public: void cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level); void cudaFreeProbeQuantityArray(Probe* probe, int level); - void cudaAllocProbeQuantities(Probe* probe, int level); - void cudaCopyProbeQuantitiesHtoD(Probe* probe, int level); - void cudaCopyProbeQuantitiesDtoH(Probe* probe, int level); - void cudaFreeProbeQuantities(Probe* probe, int level); + void cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level); + void cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level); + void cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level); + void cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level); private: CudaMemoryManager(std::shared_ptr<Parameter> parameter); diff --git a/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu b/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu index 7cd1ad872..951e8804d 100644 --- a/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu +++ b/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu @@ -20,8 +20,7 @@ __global__ void interpQuantities( uint* pointIndices, real* distX, real* distY, real* distZ, real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, - // real* vx_point, real* vy_point, real* vz_point, real* rho_point, - PostProcessingVariable* PostProcessingVariables, + bool* quantities, uint* quantityArrayOffsets, real* quantityArray ) { @@ -61,30 +60,23 @@ __global__ void interpQuantities( uint* pointIndices, //TODO change computation of means and variances to something more stable, see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm - for( int variableIndex = 0; PostProcessingVariables[variableIndex] != PostProcessingVariable::LAST; variableIndex++) + if(quantities[int(PostProcessingVariable::Means)]) { - PostProcessingVariable variable = PostProcessingVariables[variableIndex]; - uint arrayOffset = quantityArrayOffsets[int(variable)]*nPoints; + uint arrayOffset = quantityArrayOffsets[int(PostProcessingVariable::Means)]*nPoints; - switch(variable) - { - case PostProcessingVariable::Means: - { - // printf("u_interp: %f \n", u_interpX); - quantityArray[arrayOffset+node] += u_interpX; arrayOffset += nPoints; - quantityArray[arrayOffset+node] += u_interpY; arrayOffset += nPoints; - quantityArray[arrayOffset+node] += u_interpZ; arrayOffset += nPoints; - quantityArray[arrayOffset+node] += rho_interp; - } break; - case PostProcessingVariable::Variances: - { - quantityArray[arrayOffset+node] += pow(u_interpX, 2.f); arrayOffset += nPoints; - quantityArray[arrayOffset+node] += pow(u_interpY, 2.f); arrayOffset += nPoints; - quantityArray[arrayOffset+node] += pow(u_interpZ, 2.f); arrayOffset += nPoints; - quantityArray[arrayOffset+node] += pow(rho_interp, 2.f); - } break; - default: break; - } + quantityArray[arrayOffset+node] += u_interpX; arrayOffset += nPoints; + quantityArray[arrayOffset+node] += u_interpY; arrayOffset += nPoints; + quantityArray[arrayOffset+node] += u_interpZ; arrayOffset += nPoints; + quantityArray[arrayOffset+node] += rho_interp; + } + if(quantities[int(PostProcessingVariable::Variances)]) + { + uint arrayOffset = quantityArrayOffsets[int(PostProcessingVariable::Variances)]*nPoints; + + quantityArray[arrayOffset+node] += pow(u_interpX, 2.f); arrayOffset += nPoints; + quantityArray[arrayOffset+node] += pow(u_interpY, 2.f); arrayOffset += nPoints; + quantityArray[arrayOffset+node] += pow(u_interpZ, 2.f); arrayOffset += nPoints; + quantityArray[arrayOffset+node] += pow(rho_interp, 2.f); } } @@ -94,11 +86,6 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* probeParams.resize(para->getMaxLevel()+1); - //Remove double entries - std::unordered_set<PostProcessingVariable> s(this->postProcessingVariables.begin(), this->postProcessingVariables.end()); - this->postProcessingVariables.assign(s.begin(), s.end()); - this->addPostProcessingVariable(PostProcessingVariable::LAST); - for(int level=0; level<=para->getMaxLevel(); level++) { std::vector<int> probeIndices_level; @@ -155,23 +142,28 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* uint arrOffset = 0; - cudaManager->cudaAllocProbeQuantities(this, level); + cudaManager->cudaAllocProbeQuantitiesAndOffsets(this, level); - for(PostProcessingVariable variable: this->postProcessingVariables) + for( int var=0; var<int(PostProcessingVariable::LAST); var++) { - probeParams[level]->arrayOffsetsH[int(variable)] = arrOffset; - switch(variable) + if(this->quantities[var]) { - case PostProcessingVariable::Means: arrOffset += 4; break; - case PostProcessingVariable::Variances: arrOffset += 4; break; - default: break; + probeParams[level]->quantitiesH[var] = true; + probeParams[level]->arrayOffsetsH[var] = arrOffset; + switch(static_cast<PostProcessingVariable>(var)) + { + case PostProcessingVariable::Means: arrOffset += 4; break; + case PostProcessingVariable::Variances: arrOffset += 4; break; + default: break; + } } } + + cudaManager->cudaCopyProbeQuantitiesAndOffsetsHtoD(this, level); probeParams[level]->nArrays = arrOffset; + cudaManager->cudaAllocProbeQuantityArray(this, level); - std::copy(this->postProcessingVariables.begin(), this->postProcessingVariables.end(), probeParams[level]->quantitiesH); - cudaManager->cudaCopyProbeQuantitiesHtoD(this, level); for(uint arr=0; arr<probeParams[level]->nArrays; arr++) { @@ -211,7 +203,7 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaManager) cudaManager->cudaFreeProbeDistances(this, level); cudaManager->cudaFreeProbeIndices(this, level); cudaManager->cudaFreeProbeQuantityArray(this, level); - cudaManager->cudaFreeProbeQuantities(this, level); + cudaManager->cudaFreeProbeQuantitiesAndOffsets(this, level); } } @@ -250,13 +242,14 @@ void Probe::setProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, this->setProbePointsFromList(pointCoordsXtmp, pointCoordsYtmp, pointCoordsZtmp); } -void Probe::addPostProcessingVariable(PostProcessingVariable _variable) +void Probe::addPostProcessingVariable(PostProcessingVariable variable) { - this->postProcessingVariables.push_back(_variable); - switch(_variable) + this->quantities[int(variable)] = true; + switch(static_cast<PostProcessingVariable>(variable)) { case PostProcessingVariable::Means: break; - case PostProcessingVariable::Variances: this->postProcessingVariables.push_back(PostProcessingVariable::Means); break; + case PostProcessingVariable::Variances: + this->addPostProcessingVariable(PostProcessingVariable::Means); break; default: break; } } @@ -351,16 +344,18 @@ void Probe::writeGridFile(Parameter* para, int level, std::vector<std::string>& for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes); //TODO maybe change order of loops, could be faster, maybe not important, also still very ugly - for (unsigned int pos = startpos; pos < endpos; pos++) + for (uint pos = startpos; pos < endpos; pos++) { int dn = pos-startpos; ////////////////////////////////////////////////////////////////////////// int offset = 0; - for(PostProcessingVariable variable: this->postProcessingVariables) + for( int var=0; var < int(PostProcessingVariable::LAST); var++) { - int arrOffset = this->getProbeStruct(level)->arrayOffsetsH[int(variable)]*this->getProbeStruct(level)->nPoints; - switch(variable) + if(this->quantities[var]) { + int arrOffset = this->getProbeStruct(level)->arrayOffsetsH[var]*this->getProbeStruct(level)->nPoints; + switch(static_cast<PostProcessingVariable>(var)) + { case PostProcessingVariable::Means: { nodedata[offset][dn] = (double)this->getProbeStruct(level)->quantitiesArrayH[arrOffset+pos]*para->getVelocityRatio()*inv_t; arrOffset+=probeParams[level]->nPoints; offset++; @@ -377,7 +372,9 @@ void Probe::writeGridFile(Parameter* para, int level, std::vector<std::string>& nodedata[offset][dn] = double(this->getProbeStruct(level)->quantitiesArrayH[arrOffset+pos]*inv_t - pow(this->getProbeStruct(level)->quantitiesArrayH[arrOffset+meansShift+pos]*inv_t,2))*pow(para->getVelocityRatio(),2); arrOffset+=probeParams[level]->nPoints; offset++; } break; default: break; + } } + } } WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(fnames[part], nodes, nodedatanames, nodedata); @@ -387,24 +384,20 @@ void Probe::writeGridFile(Parameter* para, int level, std::vector<std::string>& std::vector<std::string> Probe::getVarNames() { std::vector<std::string> varNames; - for(PostProcessingVariable variable: this->postProcessingVariables) + if(this->quantities[int(PostProcessingVariable::Means)]) { - switch(variable) - { - case PostProcessingVariable::Means: - varNames.push_back("vx_mean"); - varNames.push_back("vy_mean"); - varNames.push_back("vz_mean"); - varNames.push_back("rho_mean"); - break; - case PostProcessingVariable::Variances: - varNames.push_back("vx_var"); - varNames.push_back("vy_var"); - varNames.push_back("vz_var"); - varNames.push_back("rho_var"); - break; - default: break; - } + varNames.push_back("vx_mean"); + varNames.push_back("vy_mean"); + varNames.push_back("vz_mean"); + varNames.push_back("rho_mean"); + } + + if(this->quantities[int(PostProcessingVariable::Variances)]) + { + varNames.push_back("vx_var"); + varNames.push_back("vy_var"); + varNames.push_back("vz_var"); + varNames.push_back("rho_var"); } return varNames; } diff --git a/src/gpu/VirtualFluids_GPU/Visitor/Probe.h b/src/gpu/VirtualFluids_GPU/Visitor/Probe.h index 5f5c7ea82..7cd98e02f 100644 --- a/src/gpu/VirtualFluids_GPU/Visitor/Probe.h +++ b/src/gpu/VirtualFluids_GPU/Visitor/Probe.h @@ -13,17 +13,17 @@ enum class PostProcessingVariable{ // In init add number of arrays + offset in switch statement // If new quantity depends on other quantities i.e. mean, catch in addPostProcessingVariable Means = 0, - Variances = 4, - LAST = 8, + Variances = 1, + LAST = 2, }; struct ProbeStruct{ uint nPoints, nArrays; uint *pointIndicesH, *pointIndicesD; real *pointCoordsX, *pointCoordsY, *pointCoordsZ; real *distXH, *distYH, *distZH, *distXD, *distYD, *distZD; - real* quantitiesArrayH, *quantitiesArrayD; - PostProcessingVariable* quantitiesH,* quantitiesD; - uint* arrayOffsetsH, *arrayOffsetsD; + real *quantitiesArrayH, *quantitiesArrayD; + bool *quantitiesH, *quantitiesD; + uint *arrayOffsetsH, *arrayOffsetsD; }; @@ -48,7 +48,6 @@ public: void free(Parameter* para, CudaMemoryManager* cudaManager); ProbeStruct* getProbeStruct(int level){ return this->probeParams[level]; } - std::vector<PostProcessingVariable> getPostProcessingVariables(){return this->postProcessingVariables; } void setProbePointsFromList(std::vector<real> &_pointCoordsX, std::vector<real> &_pointCoordsY, std::vector<real> &_pointCoordsZ); void setProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, real delta_y, real delta_z); @@ -66,15 +65,12 @@ private: uint nProbePoints; std::vector<ProbeStruct*> probeParams; - std::vector<PostProcessingVariable> postProcessingVariables; + bool quantities[int(PostProcessingVariable::LAST)]; std::vector<std::string> fileNamesForCollectionFile; std::vector<std::string> varNames; uint tStart; uint tOut; - // int* pointIndicesH, *pointIndicesD; - // std::vector< std::vector<int> > probeIndices; - // std::vector< std::vector<real> > distX, distY, distZ; }; #endif \ No newline at end of file -- GitLab