From 952aafde6a3dff5a28acda6a7e2bbe2f039bfcfe Mon Sep 17 00:00:00 2001 From: Anna Wellmann <a.wellmann@tu-braunschweig.de> Date: Tue, 14 Sep 2021 15:15:21 +0200 Subject: [PATCH] Split fine to coarse kernel into border and bulk --- apps/gpu/LBM/MusselOyster/MusselOyster.cpp | 6 +-- src/gpu/GridGenerator/grid/Grid.h | 1 + .../grid/GridBuilder/GridBuilder.h | 1 + .../grid/GridBuilder/LevelGridBuilder.cpp | 5 ++ .../grid/GridBuilder/LevelGridBuilder.h | 1 + src/gpu/GridGenerator/grid/GridImp.cu | 29 +++++++++++ src/gpu/GridGenerator/grid/GridImp.h | 1 + .../Calculation/UpdateGrid27.cpp | 48 +++++++++---------- .../Calculation/UpdateGrid27.h | 4 +- .../GridReaderGenerator/GridGenerator.cpp | 33 +++++++++++++ src/gpu/VirtualFluids_GPU/LBM/LB.h | 5 ++ .../VirtualFluids_GPU/Parameter/Parameter.h | 3 ++ 12 files changed, 105 insertions(+), 32 deletions(-) diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp index 79d6637b3..f57b20775 100644 --- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp +++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp @@ -110,7 +110,7 @@ void multipleLevel(const std::string& configPath) bool useGridGenerator = true; bool useMultiGPU = false; - bool useStreams = false; + bool useStreams = true; bool useLevels = true; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -118,7 +118,7 @@ void multipleLevel(const std::string& configPath) std::string bivalveType = "MUSSEL"; // "MUSSEL" "OYSTER" std::string gridPath(gridPathParent + bivalveType); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file - real dxGrid = (real)2.0; + real dxGrid = (real)1.0; real vxLB = (real)0.051; // LB units real Re = (real)300.0; real viscosityLB = (vxLB * dxGrid) / Re; @@ -139,7 +139,7 @@ void multipleLevel(const std::string& configPath) para->setTOut(1000); - para->setTEnd(1000); + para->setTEnd(10000); para->setCalcDragLift(false); para->setUseWale(false); diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h index 460b7a171..387a9a7b0 100644 --- a/src/gpu/GridGenerator/grid/Grid.h +++ b/src/gpu/GridGenerator/grid/Grid.h @@ -51,6 +51,7 @@ public: HOSTDEVICE virtual void setFieldEntry(uint matrixIndex, char type) = 0; CUDA_HOST virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const = 0; + CUDA_HOST virtual void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const = 0; CUDA_HOST virtual int *getNeighborsX() const = 0; CUDA_HOST virtual int *getNeighborsY() const = 0; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index 9ff99fad4..ebe8ac8f9 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -56,6 +56,7 @@ public: virtual uint getNumberOfNodesCF(int level) = 0; virtual uint getNumberOfNodesFC(int level) = 0; virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf, int level) const = 0; + virtual void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const = 0; virtual void getOffsetFC(real* xOffCf, real* yOffCf, real* zOffCf, int level) = 0; virtual void getOffsetCF(real* xOffFc, real* yOffFc, real* zOffFc, int level) = 0; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp index ba2555654..fa0166a04 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp @@ -206,6 +206,11 @@ void LevelGridBuilder::getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, u this->grids[level]->getGridInterfaceIndices(iCellCfc, iCellCff, iCellFcc, iCellFcf); } +void LevelGridBuilder::getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const +{ + this->grids[level]->getGridInterfaceIndicesFCCBorderBulk(iCellFccBorder, intFCBorderKfc, iCellFccBulk, intFCBulkKfc, level); +} + void LevelGridBuilder::getOffsetFC(real * xOffFC, real * yOffFC, real * zOffFC, int level) { for (uint i = 0; i < getNumberOfNodesFC(level); i++) diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index 4cbb927a2..542df12d1 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -137,6 +137,7 @@ public: GRIDGENERATOR_EXPORT uint getNumberOfNodesFC(int level) override; GRIDGENERATOR_EXPORT void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf, int level) const override; + GRIDGENERATOR_EXPORT void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const override; GRIDGENERATOR_EXPORT void getOffsetFC(real* xOffCf, real* yOffCf, real* zOffCf, int level) override; GRIDGENERATOR_EXPORT void getOffsetCF(real* xOffFc, real* yOffFc, real* zOffFc, int level) override; diff --git a/src/gpu/GridGenerator/grid/GridImp.cu b/src/gpu/GridGenerator/grid/GridImp.cu index 9a6c88e58..5b9e0da36 100644 --- a/src/gpu/GridGenerator/grid/GridImp.cu +++ b/src/gpu/GridGenerator/grid/GridImp.cu @@ -1943,6 +1943,35 @@ void GridImp::getGridInterface(uint* gridInterfaceList, const uint* oldGridInter gridInterfaceList[i] = oldGridInterfaceList[i] + 1; // + 1 for numbering shift between GridGenerator and VF_GPU } +void GridImp::getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, + uint &intFCBulkKfc, int level) const +{ + // reorder the array of FCC indices and return pointers and sizes of the new subarrays + + uint *iCellFccAll = iCellFccBorder; + uint intFCKfcAll = this->gridInterface->fc.numberOfEntries; + std::vector<uint> iCellFccBorderVector; + std::vector<uint> iCellFccBulkVector; + + for (uint i = 0; i < intFCKfcAll; i++) + if (std::find(this->fluidNodeIndicesBorder.begin(), this->fluidNodeIndicesBorder.end(), iCellFccAll[i]) != + this->fluidNodeIndicesBorder.end()) + iCellFccBorderVector.push_back(iCellFccAll[i]); + else + iCellFccBulkVector.push_back(iCellFccAll[i]); + + intFCBorderKfc = (uint)iCellFccBorderVector.size(); + intFCBulkKfc = (uint)iCellFccBulkVector.size(); + iCellFccBulk = iCellFccBorder + intFCBorderKfc; + + for (uint i = 0; i < (uint)iCellFccBorderVector.size(); i++) + iCellFccBorder[i] = iCellFccBorderVector[i]; + for (uint i = 0; i < (uint)iCellFccBulkVector.size(); i++) + iCellFccBulk[i] = iCellFccBulkVector[i]; + std::cout << iCellFccBulk[0] << std::endl; + std::cout << iCellFccBulk[iCellFccBulkVector.size()-1] << std::endl; +} + #define GEOFLUID 19 #define GEOSOLID 16 diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h index 1e74e9521..25d78f895 100644 --- a/src/gpu/GridGenerator/grid/GridImp.h +++ b/src/gpu/GridGenerator/grid/GridImp.h @@ -219,6 +219,7 @@ public: HOSTDEVICE uint getNumberOfNodesFC() const override; CUDA_HOST void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const override; CUDA_HOST static void getGridInterface(uint* gridInterfaceList, const uint* oldGridInterfaceList, uint size); + CUDA_HOST void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const override; int* getNeighborsX() const override; int* getNeighborsY() const override; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 18aab89a7..d28f7f4e4 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -75,21 +75,17 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage { if (para->getUseStreams() && para->getNumprocs() > 1) { } else { - if (para->getKernelNeedsFluidNodeIndicesToRun()) { - fineToCoarseUsingIndex(para, level, -1); + //fineToCoarse(para, level); - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); + fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC, + para->getParD(level)->intFCBorder.kFC, -1); + fineToCoarseWithStream(para, level, para->getParD(level)->intFCBulk.ICellFCC, + para->getParD(level)->intFCBulk.kFC, -1); - coarseToFine(para, level); - } else { - fineToCoarse(para, level); + prepareExchangeMultiGPU(para, level, -1); + exchangeMultiGPU(para, comm, cudaManager, level, -1); - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); - - coarseToFine(para, level); - } + coarseToFine(para, level); } } } @@ -110,8 +106,7 @@ void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, i } void collisionUsingIndex(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, - std::vector<SPtr<Kernel>> &kernels, - uint *fluidNodeIndices, uint numberOfFluidNodes, int stream) + std::vector<SPtr<Kernel>> &kernels, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream) { if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0) kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, stream); @@ -1138,18 +1133,19 @@ void fineToCoarse(Parameter* para, int level) } -void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex) +void fineToCoarseWithStream(Parameter *para, int level, uint *iCellFCC, uint k_FC, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - ScaleFC_RhoSq_comp_27_Stream( - para->getParD(level)->d0SP.f[0], para->getParD(level + 1)->d0SP.f[0], para->getParD(level)->neighborX_SP, - para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, para->getParD(level + 1)->neighborX_SP, - para->getParD(level + 1)->neighborY_SP, para->getParD(level + 1)->neighborZ_SP, - para->getParD(level)->size_Mat_SP, para->getParD(level + 1)->size_Mat_SP, para->getParD(level)->evenOrOdd, - para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, - para->getParD(level)->omega, para->getParD(level + 1)->omega, para->getParD(level)->vis, - para->getParD(level)->nx, para->getParD(level)->ny, para->getParD(level + 1)->nx, para->getParD(level + 1)->ny, - para->getParD(level)->numberofthreads, para->getParD(level)->offFC, stream); + getLastCudaError("ScaleFC27_RhoSq_comp execution failed"); + ScaleFC_RhoSq_comp_27_Stream( para->getParD(level)->d0SP.f[0], para->getParD(level+1)->d0SP.f[0], + para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + para->getParD(level+1)->neighborX_SP, para->getParD(level+1)->neighborY_SP, para->getParD(level+1)->neighborZ_SP, + para->getParD(level)->size_Mat_SP, para->getParD(level+1)->size_Mat_SP, para->getParD(level)->evenOrOdd, + iCellFCC, para->getParD(level)->intFC.ICellFCF, + k_FC, para->getParD(level)->omega, para->getParD(level + 1)->omega, + para->getParD(level)->vis, para->getParD(level)->nx, para->getParD(level)->ny, + para->getParD(level+1)->nx, para->getParD(level+1)->ny, para->getParD(level)->numberofthreads, + para->getParD(level)->offFC, stream); getLastCudaError("ScaleFC27_RhoSq_comp_Stream execution failed"); ////////////////////////////////////////////////////////////////////////// @@ -1159,7 +1155,7 @@ void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex) if (para->getDiffOn()) { if (para->getDiffMod() == 7) { // TODO - printf("fineToCoarseUsingIndex Advection Diffusion not implemented"); + printf("fineToCoarseWithStream Advection Diffusion not implemented"); //ScaleFCThSMG7(para->getParD(level)->d0SP.f[0], para->getParD(level + 1)->d0SP.f[0], // para->getParD(level)->d7.f[0], para->getParD(level + 1)->d7.f[0], // para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -1173,7 +1169,7 @@ void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex) //getLastCudaError("ScaleFCTh7 execution failed"); } else if (para->getDiffMod() == 27) { // TODO - printf("fineToCoarseUsingIndex Advection Diffusion not implemented"); + printf("fineToCoarseWithStream Advection Diffusion not implemented"); //ScaleFCThS27(para->getParD(level)->d0SP.f[0], para->getParD(level + 1)->d0SP.f[0], // para->getParD(level)->d27.f[0], para->getParD(level + 1)->d27.f[0], // para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index d80a3bf6c..d4eda6f6c 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -40,10 +40,8 @@ extern "C" void calcMacroscopicQuantities(Parameter* para, int level); extern "C" void preCollisionBC(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t); extern "C" void fineToCoarse(Parameter* para, int level); -extern "C" void fineToCoarseUsingIndex(Parameter *para, int level, int stream = -1); +extern "C" void fineToCoarseWithStream(Parameter *para, int level, uint *iCellFCC, uint k_FC, int streamIndex); extern "C" void coarseToFine(Parameter* para, int level); -extern "C" void coarseToFineUsingIndex(Parameter *para, int level, uint *fluidNodeIndices = nullptr, - uint numberOfFluidNodes = 0, int stream = -1); #endif diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index b96646142..0b71e98ae 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -920,6 +920,39 @@ void GridGenerator::allocArrays_OffsetScale() builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level); builder->getOffsetFC(para->getParH(level)->offFC.xOffFC, para->getParH(level)->offFC.yOffFC, para->getParH(level)->offFC.zOffFC, level); builder->getGridInterfaceIndices(para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF, level); + + if (para->getUseStreams()) { + // split fine-to-coarse-coarse indices into border and bulk + para->getParH(level)->intFCBorder.ICellFCC = para->getParH(level)->intFC.ICellFCC; + para->getParD(level)->intFCBorder.ICellFCC = para->getParD(level)->intFC.ICellFCC; + builder->getGridInterfaceIndicesFCCBorderBulk(para->getParH(level)->intFCBorder.ICellFCC, para->getParH(level)->intFCBorder.kFC, para->getParH(level)->intFCBulk.ICellFCC, para->getParH(level)->intFCBulk.kFC, level); + para->getParD(level)->intFCBorder.kFC = para->getParH(level)->intFCBorder.kFC; + para->getParD(level)->intFCBulk.kFC = para->getParH(level)->intFCBulk.kFC; + para->getParD(level)->intFCBulk.ICellFCC = para->getParD(level)->intFCBorder.ICellFCC + para->getParD(level)->intFCBulk.kFC; + } + std::cout << "sizeOld " << para->getParH(level)->K_FC << std::endl; + std::cout << "sizeNew " << para->getParH(level)->intFCBorder.kFC + para->getParH(level)->intFCBulk.kFC + << " = border " << para->getParH(level)->intFCBorder.kFC << " + bulk " + << para->getParH(level)->intFCBulk.kFC << std::endl; + std::cout << "first old " << para->getParH(level)->intFC.ICellFCC[0] << std::endl; + std::cout << "first new " << para->getParH(level)->intFCBorder.ICellFCC[0] + << std::endl; + //std::cout << "last border old " << para->getParH(level)->intFC.ICellFCC[para->getParH(level)->intFCBorder.kFC - 1] + // << std::endl; //if (para->getParH(level)->intFCBorder.kFC > 0) + //std::cout << "last border new " << para->getParH(level)->intFCBorder.ICellFCC[para->getParH(level)->intFCBorder.kFC - 1] + // << std::endl; + std::cout << "old pointer " << para->getParH(level)->intFC.ICellFCC << std::endl; + std::cout << "border pointer (= old pointer) " << para->getParH(level)->intFCBorder.ICellFCC << std::endl; + std::cout << "bulk pointer new " << para->getParH(level)->intFCBulk.ICellFCC << std::endl; + std::cout << "first bulk old " + << para->getParH(level)->intFC.ICellFCC[para->getParH(level)->intFCBorder.kFC] << std::endl; + std::cout << "first bulk new " + << para->getParH(level)->intFCBulk.ICellFCC[0] << std::endl; + std::cout << "last bulk old " + << para->getParH(level)->intFC.ICellFCC[para->getParH(level)->K_FC - 1] << std::endl; + std::cout << "last bulk new " + << para->getParH(level)->intFCBulk.ICellFCC[para->getParH(level)->intFCBulk.kFC - 1] << std::endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy cudaMemoryManager->cudaCopyInterfaceCF(level); diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index a33b3b792..a116485ac 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -140,6 +140,11 @@ typedef struct ICellFC{ uint kFC; } InterpolationCellFC; +typedef struct ICellFCBB { + uint *ICellFCC; + uint kFC; +} InterpolationCellFCBorderBulk; + //Offset of the interface cells at the wall typedef struct OffCF{ real* xOffCF; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 38013f5a6..b9bc179e3 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -181,6 +181,9 @@ struct LBMSimulationParameter unsigned int mem_size_kCF; unsigned int mem_size_kFC; + InterpolationCellFCBorderBulk intFCBorder; + InterpolationCellFCBorderBulk intFCBulk; + // offset////////////////// OffsetCF offCF; OffsetFC offFC; -- GitLab