From 72b699b325afd48c5cc9f27ccf594fcfca0a6b21 Mon Sep 17 00:00:00 2001
From: Hkorb <henry.korb@geo.uu.se>
Date: Fri, 17 Sep 2021 17:43:18 +0200
Subject: [PATCH] refactor probe

---
 .../GPU/CudaMemoryManager.cpp                 |  41 +++---
 .../VirtualFluids_GPU/GPU/CudaMemoryManager.h |   8 +-
 src/gpu/VirtualFluids_GPU/Visitor/Probe.cu    | 121 +++++++++---------
 src/gpu/VirtualFluids_GPU/Visitor/Probe.h     |  16 +--
 4 files changed, 86 insertions(+), 100 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 75e453282..84104b990 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -2916,37 +2916,34 @@ void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level)
     checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->quantitiesArrayD) );
 }
 
-void CudaMemoryManager::cudaAllocProbeQuantities(Probe* probe, int level)
+void CudaMemoryManager::cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level)
 {
-    size_t tmpV = probe->getPostProcessingVariables().size()*sizeof(int);
-    size_t tmpO = int(PostProcessingVariable::LAST)*sizeof(int);
-    
-    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->arrayOffsetsH, tmpO) );    
-    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesH, tmpV) );
-
-    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->arrayOffsetsD, tmpO) );
-    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->quantitiesD, tmpV) );
-    setMemsizeGPU(tmpO+tmpV, false);
+    size_t tmpA = int(PostProcessingVariable::LAST)*sizeof(int);
+    size_t tmpQ = int(PostProcessingVariable::LAST)*sizeof(bool);
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesH, tmpQ) );    
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->quantitiesD, tmpQ) );
+    checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->arrayOffsetsH, tmpA) );    
+    checkCudaErrors( cudaMalloc    ((void**) &probe->getProbeStruct(level)->arrayOffsetsD, tmpA) );
+    setMemsizeGPU(tmpA+tmpQ, false);
 }
 
-void CudaMemoryManager::cudaCopyProbeQuantitiesHtoD(Probe* probe, int level)
-{
-    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyHostToDevice) );
-
+void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level)
+{    
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyHostToDevice) );
 }
-void CudaMemoryManager::cudaCopyProbeQuantitiesDtoH(Probe* probe, int level)
+
+void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level)
 {
-    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, probe->getPostProcessingVariables().size()*sizeof(real), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyDeviceToHost) );
 }
-void CudaMemoryManager::cudaFreeProbeQuantities(Probe* probe, int level)
+void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level)
 {
-    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->arrayOffsetsH) );
     checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesH) );
-
-    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->arrayOffsetsD) );
+    checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->arrayOffsetsH) );
     checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->quantitiesD) );
+    checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->arrayOffsetsD) );
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index b11cac610..1d1311bf1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -367,10 +367,10 @@ public:
     void cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level);
     void cudaFreeProbeQuantityArray(Probe* probe, int level);
     
-    void cudaAllocProbeQuantities(Probe* probe, int level);
-    void cudaCopyProbeQuantitiesHtoD(Probe* probe, int level);
-    void cudaCopyProbeQuantitiesDtoH(Probe* probe, int level);
-    void cudaFreeProbeQuantities(Probe* probe, int level);
+    void cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level);
+    void cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level);
+    void cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level);
+    void cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level);
 
 private:
     CudaMemoryManager(std::shared_ptr<Parameter> parameter);
diff --git a/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu b/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu
index 7cd1ad872..951e8804d 100644
--- a/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/Visitor/Probe.cu
@@ -20,8 +20,7 @@ __global__ void interpQuantities(   uint* pointIndices,
                                     real* distX, real* distY, real* distZ,
                                     real* vx, real* vy, real* vz, real* rho,            
                                     uint* neighborX, uint* neighborY, uint* neighborZ,
-                                    // real* vx_point, real* vy_point, real* vz_point, real* rho_point,
-                                    PostProcessingVariable* PostProcessingVariables,
+                                    bool* quantities,
                                     uint* quantityArrayOffsets, real* quantityArray
                                 )
 {
@@ -61,30 +60,23 @@ __global__ void interpQuantities(   uint* pointIndices,
 
     //TODO change computation of  means and variances to something more stable, see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
 
-    for( int variableIndex = 0; PostProcessingVariables[variableIndex] != PostProcessingVariable::LAST; variableIndex++)
+    if(quantities[int(PostProcessingVariable::Means)])
     {
-        PostProcessingVariable variable = PostProcessingVariables[variableIndex];
-        uint arrayOffset = quantityArrayOffsets[int(variable)]*nPoints;
+        uint arrayOffset = quantityArrayOffsets[int(PostProcessingVariable::Means)]*nPoints;
 
-        switch(variable)
-        {
-            case PostProcessingVariable::Means:
-            {
-                // printf("u_interp: %f \n", u_interpX);
-                quantityArray[arrayOffset+node] += u_interpX; arrayOffset += nPoints;
-                quantityArray[arrayOffset+node] += u_interpY; arrayOffset += nPoints;
-                quantityArray[arrayOffset+node] += u_interpZ; arrayOffset += nPoints;
-                quantityArray[arrayOffset+node] += rho_interp;
-            } break;
-            case PostProcessingVariable::Variances:
-            { 
-                quantityArray[arrayOffset+node] += pow(u_interpX, 2.f); arrayOffset += nPoints;
-                quantityArray[arrayOffset+node] += pow(u_interpY, 2.f); arrayOffset += nPoints;
-                quantityArray[arrayOffset+node] += pow(u_interpZ, 2.f); arrayOffset += nPoints;
-                quantityArray[arrayOffset+node] += pow(rho_interp, 2.f); 
-            } break;
-            default: break;
-        }
+        quantityArray[arrayOffset+node] += u_interpX; arrayOffset += nPoints;
+        quantityArray[arrayOffset+node] += u_interpY; arrayOffset += nPoints;
+        quantityArray[arrayOffset+node] += u_interpZ; arrayOffset += nPoints;
+        quantityArray[arrayOffset+node] += rho_interp;
+    }
+    if(quantities[int(PostProcessingVariable::Variances)])
+    {
+        uint arrayOffset = quantityArrayOffsets[int(PostProcessingVariable::Variances)]*nPoints;
+
+        quantityArray[arrayOffset+node] += pow(u_interpX, 2.f); arrayOffset += nPoints;
+        quantityArray[arrayOffset+node] += pow(u_interpY, 2.f); arrayOffset += nPoints;
+        quantityArray[arrayOffset+node] += pow(u_interpZ, 2.f); arrayOffset += nPoints;
+        quantityArray[arrayOffset+node] += pow(rho_interp, 2.f); 
     }
 }
 
@@ -94,11 +86,6 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager*
 
     probeParams.resize(para->getMaxLevel()+1);
 
-    //Remove double entries
-    std::unordered_set<PostProcessingVariable> s(this->postProcessingVariables.begin(), this->postProcessingVariables.end());
-    this->postProcessingVariables.assign(s.begin(), s.end());
-    this->addPostProcessingVariable(PostProcessingVariable::LAST);
-
     for(int level=0; level<=para->getMaxLevel(); level++)
     {
         std::vector<int> probeIndices_level;
@@ -155,23 +142,28 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager*
 
         uint arrOffset = 0;
 
-        cudaManager->cudaAllocProbeQuantities(this, level);
+        cudaManager->cudaAllocProbeQuantitiesAndOffsets(this, level);
 
-        for(PostProcessingVariable variable: this->postProcessingVariables)
+        for( int var=0; var<int(PostProcessingVariable::LAST); var++)
         {
-            probeParams[level]->arrayOffsetsH[int(variable)] = arrOffset;
-            switch(variable)
+            if(this->quantities[var])
             {
-                case PostProcessingVariable::Means: arrOffset += 4; break;                
-                case PostProcessingVariable::Variances: arrOffset += 4; break;
-                default: break;
+                probeParams[level]->quantitiesH[var] = true;
+                probeParams[level]->arrayOffsetsH[var] = arrOffset;
+                switch(static_cast<PostProcessingVariable>(var))
+                {
+                    case PostProcessingVariable::Means: arrOffset += 4; break;                
+                    case PostProcessingVariable::Variances: arrOffset += 4; break;
+                    default: break;
+                }
             }
         }
+        
+        cudaManager->cudaCopyProbeQuantitiesAndOffsetsHtoD(this, level);
 
         probeParams[level]->nArrays = arrOffset;
+
         cudaManager->cudaAllocProbeQuantityArray(this, level);
-        std::copy(this->postProcessingVariables.begin(), this->postProcessingVariables.end(), probeParams[level]->quantitiesH);
-        cudaManager->cudaCopyProbeQuantitiesHtoD(this, level);
 
         for(uint arr=0; arr<probeParams[level]->nArrays; arr++)
         {
@@ -211,7 +203,7 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaManager)
         cudaManager->cudaFreeProbeDistances(this, level);
         cudaManager->cudaFreeProbeIndices(this, level);
         cudaManager->cudaFreeProbeQuantityArray(this, level);
-        cudaManager->cudaFreeProbeQuantities(this, level);
+        cudaManager->cudaFreeProbeQuantitiesAndOffsets(this, level);
 
     }
 }
@@ -250,13 +242,14 @@ void Probe::setProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z,
     this->setProbePointsFromList(pointCoordsXtmp, pointCoordsYtmp, pointCoordsZtmp);
 }
 
-void Probe::addPostProcessingVariable(PostProcessingVariable _variable)
+void Probe::addPostProcessingVariable(PostProcessingVariable variable)
 {
-    this->postProcessingVariables.push_back(_variable);
-    switch(_variable)
+    this->quantities[int(variable)] = true;
+    switch(static_cast<PostProcessingVariable>(variable))
     {
         case PostProcessingVariable::Means: break;
-        case PostProcessingVariable::Variances: this->postProcessingVariables.push_back(PostProcessingVariable::Means); break;
+        case PostProcessingVariable::Variances: 
+            this->addPostProcessingVariable(PostProcessingVariable::Means); break;
         default: break;
     }
 }
@@ -351,16 +344,18 @@ void Probe::writeGridFile(Parameter* para, int level, std::vector<std::string>&
         for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes);
 
         //TODO maybe change order of loops, could be faster, maybe not important, also still very ugly
-        for (unsigned int pos = startpos; pos < endpos; pos++)
+        for (uint pos = startpos; pos < endpos; pos++)
         {
             int dn = pos-startpos;
             //////////////////////////////////////////////////////////////////////////
             int offset = 0;
-            for(PostProcessingVariable variable: this->postProcessingVariables)
+            for( int var=0; var < int(PostProcessingVariable::LAST); var++)
             {
-                int arrOffset = this->getProbeStruct(level)->arrayOffsetsH[int(variable)]*this->getProbeStruct(level)->nPoints;
-                switch(variable)
+                if(this->quantities[var])
                 {
+                    int arrOffset = this->getProbeStruct(level)->arrayOffsetsH[var]*this->getProbeStruct(level)->nPoints;
+                    switch(static_cast<PostProcessingVariable>(var))
+                    {
                     case PostProcessingVariable::Means:
                     {
                         nodedata[offset][dn] = (double)this->getProbeStruct(level)->quantitiesArrayH[arrOffset+pos]*para->getVelocityRatio()*inv_t; arrOffset+=probeParams[level]->nPoints; offset++;
@@ -377,7 +372,9 @@ void Probe::writeGridFile(Parameter* para, int level, std::vector<std::string>&
                         nodedata[offset][dn] = double(this->getProbeStruct(level)->quantitiesArrayH[arrOffset+pos]*inv_t - pow(this->getProbeStruct(level)->quantitiesArrayH[arrOffset+meansShift+pos]*inv_t,2))*pow(para->getVelocityRatio(),2); arrOffset+=probeParams[level]->nPoints; offset++;
                     } break;
                     default: break;
+                    }
                 }
+
             }
         }
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(fnames[part], nodes, nodedatanames, nodedata);
@@ -387,24 +384,20 @@ void Probe::writeGridFile(Parameter* para, int level, std::vector<std::string>&
 std::vector<std::string> Probe::getVarNames()
 {
     std::vector<std::string> varNames;
-    for(PostProcessingVariable variable: this->postProcessingVariables)
+    if(this->quantities[int(PostProcessingVariable::Means)])
     {
-        switch(variable)
-        {
-            case PostProcessingVariable::Means:
-                varNames.push_back("vx_mean");
-                varNames.push_back("vy_mean");
-                varNames.push_back("vz_mean");
-                varNames.push_back("rho_mean");
-                break;            
-            case PostProcessingVariable::Variances:
-                varNames.push_back("vx_var");
-                varNames.push_back("vy_var");
-                varNames.push_back("vz_var");
-                varNames.push_back("rho_var");
-                break;
-            default: break;
-        }
+        varNames.push_back("vx_mean");
+        varNames.push_back("vy_mean");
+        varNames.push_back("vz_mean");
+        varNames.push_back("rho_mean");
+    }
+
+    if(this->quantities[int(PostProcessingVariable::Variances)])
+    {
+        varNames.push_back("vx_var");
+        varNames.push_back("vy_var");
+        varNames.push_back("vz_var");
+        varNames.push_back("rho_var");
     }
     return varNames;
 }
diff --git a/src/gpu/VirtualFluids_GPU/Visitor/Probe.h b/src/gpu/VirtualFluids_GPU/Visitor/Probe.h
index 5f5c7ea82..7cd98e02f 100644
--- a/src/gpu/VirtualFluids_GPU/Visitor/Probe.h
+++ b/src/gpu/VirtualFluids_GPU/Visitor/Probe.h
@@ -13,17 +13,17 @@ enum class PostProcessingVariable{
     // In init add number of arrays + offset in switch statement
     // If new quantity depends on other quantities i.e. mean, catch in addPostProcessingVariable
     Means = 0,
-    Variances = 4,
-    LAST = 8,
+    Variances = 1,
+    LAST = 2,
 };
 struct ProbeStruct{
     uint nPoints, nArrays;
     uint *pointIndicesH, *pointIndicesD;
     real *pointCoordsX, *pointCoordsY, *pointCoordsZ;
     real *distXH, *distYH, *distZH, *distXD, *distYD, *distZD;
-    real* quantitiesArrayH, *quantitiesArrayD;
-    PostProcessingVariable* quantitiesH,* quantitiesD; 
-    uint* arrayOffsetsH, *arrayOffsetsD;
+    real *quantitiesArrayH, *quantitiesArrayD;
+    bool *quantitiesH, *quantitiesD;
+    uint *arrayOffsetsH, *arrayOffsetsD;
 };
 
 
@@ -48,7 +48,6 @@ public:
     void free(Parameter* para, CudaMemoryManager* cudaManager);
 
     ProbeStruct* getProbeStruct(int level){ return this->probeParams[level]; }
-    std::vector<PostProcessingVariable> getPostProcessingVariables(){return this->postProcessingVariables; }
 
     void setProbePointsFromList(std::vector<real> &_pointCoordsX, std::vector<real> &_pointCoordsY, std::vector<real> &_pointCoordsZ);
     void setProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, real delta_y, real delta_z);
@@ -66,15 +65,12 @@ private:
     uint nProbePoints;
 
     std::vector<ProbeStruct*> probeParams;
-    std::vector<PostProcessingVariable> postProcessingVariables;
+    bool quantities[int(PostProcessingVariable::LAST)];
     std::vector<std::string> fileNamesForCollectionFile;
     std::vector<std::string> varNames;
 
     uint tStart;
     uint tOut;
-    // int* pointIndicesH, *pointIndicesD;
-    // std::vector< std::vector<int> > probeIndices;
-    // std::vector< std::vector<real> > distX, distY, distZ;
 };
 
 #endif
\ No newline at end of file
-- 
GitLab