diff --git a/src/gpu/GridGenerator/grid/Field.cpp b/src/gpu/GridGenerator/grid/Field.cpp index ef038b3d227ff08f1ef71579f3df4546c61d3e5d..86985af60e1ca25c247b586dbc2f356c665a8875 100644 --- a/src/gpu/GridGenerator/grid/Field.cpp +++ b/src/gpu/GridGenerator/grid/Field.cpp @@ -88,12 +88,6 @@ bool Field::isFluid(uint index) const return type == FLUID || type == FLUID_CFC || type == FLUID_CFF || type == FLUID_FCC || type == FLUID_FCF || isBoundaryConditionNode(index); } -bool Field::isFluidNodeOfSpecialInterest(uint index) const -{ - const char type = field[index]; - return isBoundaryConditionNode(index); -} - bool Field::isInvalidSolid(uint index) const { return field[index] == INVALID_SOLID; diff --git a/src/gpu/GridGenerator/grid/Field.h b/src/gpu/GridGenerator/grid/Field.h index 329b587fe2b7b644e4f4b2263f5196f1f758ad4e..08fff6da7c5a3f431138dc5039b4d234493ae4b8 100644 --- a/src/gpu/GridGenerator/grid/Field.h +++ b/src/gpu/GridGenerator/grid/Field.h @@ -52,7 +52,6 @@ public: bool isCoarseToFineNode(uint index) const; bool isFineToCoarseNode(uint index) const; bool isFluid(uint index) const; - bool isFluidNodeOfSpecialInterest(uint index) const; bool isInvalidSolid(uint index) const; bool isQ(uint index) const; bool isBoundaryConditionNode(uint index) const; diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp index aa2714e492e82b58d50353d458a5889392d3e8f6..f6afafcd521245222c33972dcb46a9e9b2879826 100644 --- a/src/gpu/GridGenerator/grid/GridImp.cpp +++ b/src/gpu/GridGenerator/grid/GridImp.cpp @@ -1871,7 +1871,7 @@ uint GridImp::getSparseSize() const return this->sparseSize; } -HOSTDEVICE uint GridImp::getNumberOfFluidNodes() const { +uint GridImp::getNumberOfFluidNodes() const { return (uint)this->fluidNodeIndices.size(); } @@ -2065,7 +2065,7 @@ void GridImp::getNodeValues(real *xCoords, real *yCoords, real *zCoords, uint *n } } -CUDA_HOST void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const +void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const { for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndices.size(); nodeNumber++) fluidNodeIndices[nodeNumber] = this->fluidNodeIndices[nodeNumber]; diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h index a2597b19539fd3eb7a2947cfab073918df112cea..ee30e2b4aaadd737e1fa096eec3b815768ddd0a0 100644 --- a/src/gpu/GridGenerator/grid/GridImp.h +++ b/src/gpu/GridGenerator/grid/GridImp.h @@ -70,7 +70,7 @@ extern int DIRECTIONS[DIR_END_MAX][DIMENSION]; class GRIDGENERATOR_EXPORT GridImp : public enableSharedFromThis<GridImp>, public Grid { -private: +protected: GridImp() = default; GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution d, uint level); @@ -355,7 +355,7 @@ public: void findFluidNodeIndicesBorder() override; uint getNumberOfFluidNodes() const override; - CUDA_HOST void getFluidNodeIndices(uint *fluidNodeIndices) const override; + void getFluidNodeIndices(uint *fluidNodeIndices) const override; uint getNumberOfFluidNodesBorder() const override; void getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const override; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 318d6522fb49172c2619c5cbb6aa92751a1b759e..0cae176e48042b112480a8a718b4060a2e7396ca 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -37,7 +37,7 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) calcMacroscopicQuantities(para.get(), level); if (para->getUseTurbulentViscosity()) - calcTurbulentViscosity(para, level); + calcTurbulentViscosity(para.get(), level); ////////////////////////////////////////////////////////////////////////// @@ -49,9 +49,9 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) (this->*refinementAndExchange)(level); } - interactWithActuators(para, cudaManager, level, t); + interactWithActuators(para.get(), cudaManager.get(), level, t); - interactWithProbes(para, cudaManager, level, t); + interactWithProbes(para.get(), cudaManager.get(), level, t); } void UpdateGrid27::refinementAndExchange_noRefinementAndExchange(int level) {} @@ -304,7 +304,7 @@ void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamInde prepareExchangeCollDataZGPU27AfterFtoC(para, level, streamIndex); } -void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { ////////////////////////////////////////////////////////////////////////// @@ -335,7 +335,7 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa // 1D domain decomposition // exchangePostCollDataGPU27(para, comm, level); } -void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm) +void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm) { ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition @@ -377,7 +377,7 @@ void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicat exchangePostCollDataADZGPU27(para, comm, cudaManager, level); } } -void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { ////////////////////////////////////////////////////////////////////////// @@ -1562,7 +1562,7 @@ void coarseToFineWithStream(Parameter *para, int level, uint *iCellCFC, uint *iC } -UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator *comm, SPtr<CudaMemoryManager> cudaManager, +UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaManager, std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels) : para(para), comm(comm), cudaManager(cudaManager), pm(pm), kernels(kernels) { diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index ec96c905bce7bd99a5fda8f75a2303bc84362e16..2da3c7cb832bbd87f98e846f020ab1f02cb6b6fd 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -13,7 +13,7 @@ class Kernel; class UpdateGrid27 { public: - UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator *comm, SPtr<CudaMemoryManager> cudaManager, + UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaManager, std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels); void updateGrid(int level, unsigned int t); @@ -41,7 +41,7 @@ private: SPtr<Parameter> para; - vf::gpu::Communicator *comm; + vf::gpu::Communicator& comm; SPtr<CudaMemoryManager> cudaManager; std::vector<std::shared_ptr<PorousMedia>> pm; std::vector<SPtr<Kernel>> kernels; @@ -60,11 +60,11 @@ extern "C" void collisionAdvectionDiffusion(Parameter* para, int level); extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex); extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex); -extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm); extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index b6ce0071f9254ecabde0b05bbdcf4b37c368c11f..60c45c3dcb3805da61d3a4a78ff43323b329339b 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -49,21 +49,21 @@ void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex, } } -void startBlockingMpiSend(unsigned int numberOfSendProcessNeighbors, vf::gpu::Communicator *comm, +void startBlockingMpiSend(unsigned int numberOfSendProcessNeighbors, vf::gpu::Communicator &comm, std::vector<ProcessNeighbor27> *sendProcessNeighborHost) { for (unsigned int i = 0; i < numberOfSendProcessNeighbors; i++) { - comm->sendDataGPU((*sendProcessNeighborHost)[i].f[0], + comm.sendDataGPU((*sendProcessNeighborHost)[i].f[0], (*sendProcessNeighborHost)[i].numberOfFs, (*sendProcessNeighborHost)[i].rankNeighbor); } } -void startNonBlockingMpiReceive(unsigned int numberOfSendProcessNeighbors, vf::gpu::Communicator *comm, +void startNonBlockingMpiReceive(unsigned int numberOfSendProcessNeighbors, vf::gpu::Communicator &comm, std::vector<ProcessNeighbor27> *recvProcessNeighborHost) { for (unsigned int i = 0; i < numberOfSendProcessNeighbors; i++) { - comm->nbRecvDataGPU((*recvProcessNeighborHost)[i].f[0], + comm.nbRecvDataGPU((*recvProcessNeighborHost)[i].f[0], (*recvProcessNeighborHost)[i].numberOfFs, (*recvProcessNeighborHost)[i].rankNeighbor); } @@ -114,7 +114,7 @@ void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int stre (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))); } -void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { exchangeCollDataXGPU27(para, comm, cudaManager, level, streamIndex, @@ -124,7 +124,7 @@ void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm ¶->getParH(level)->recvProcessNeighborX); } -void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { exchangeCollDataXGPU27(para, comm, cudaManager, level, streamIndex, @@ -146,7 +146,7 @@ void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int s (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))); } -void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, @@ -172,10 +172,10 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! 5. wait for until data is received - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) comm->waitGPU(i); + for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) comm.waitGPU(i); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! 6. reset the request array, which was used for the mpi communication - if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) comm->resetRequest(); + if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) comm.resetRequest(); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! 7. copy received data from host to device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) @@ -199,7 +199,7 @@ void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int stre (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))); } -void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { exchangeCollDataYGPU27(para, comm, cudaManager, level, streamIndex, @@ -209,7 +209,7 @@ void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm ¶->getParH(level)->recvProcessNeighborY); } -void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { exchangeCollDataYGPU27(para, comm, cudaManager, level, streamIndex, @@ -231,7 +231,7 @@ void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int s (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))); } -void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, @@ -267,10 +267,10 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // wait - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) comm->waitGPU(i); + for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) comm.waitGPU(i); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // reset the request array - if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) comm->resetRequest(); + if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) comm.resetRequest(); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // copy Host to Device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { @@ -295,7 +295,7 @@ void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int stre (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))); } -void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { exchangeCollDataZGPU27(para, comm, cudaManager, level, streamIndex, @@ -304,7 +304,7 @@ void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm ¶->getParH(level)->sendProcessNeighborZ, ¶->getParH(level)->recvProcessNeighborZ); } -void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { exchangeCollDataZGPU27(para, comm, cudaManager, level, streamIndex, @@ -327,7 +327,7 @@ void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int s } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, @@ -376,10 +376,10 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // wait - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) comm->waitGPU(i); + for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) comm.waitGPU(i); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // reset the request array - if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) comm->resetRequest(); + if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) comm.resetRequest(); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // copy Host to Device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) @@ -430,7 +430,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cuda ////////////////////////////////////////////////////////////////////////// cudaManager->cudaCopyProcessNeighborFsDH(level, i); ////////////////////////////////////////////////////////////////////////// - comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], + comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], para->getParH(level)->sendProcessNeighbor[i].numberOfFs, para->getParH(level)->recvProcessNeighbor[i].f[0], para->getParH(level)->recvProcessNeighbor[i].numberOfFs, @@ -475,7 +475,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud ////////////////////////////////////////////////////////////////////////// cudaManager->cudaCopyProcessNeighborFsDH(level, i); ////////////////////////////////////////////////////////////////////////// - comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], + comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], para->getParH(level)->sendProcessNeighbor[i].numberOfFs, para->getParH(level)->recvProcessNeighbor[i].f[0], para->getParH(level)->recvProcessNeighbor[i].numberOfFs, @@ -516,7 +516,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborXFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], // para->getParH(level)->sendProcessNeighborX[i].numberOfFs, // para->getParH(level)->recvProcessNeighborX[i].f[0], // para->getParH(level)->recvProcessNeighborX[i].numberOfFs, @@ -563,7 +563,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborYFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, // para->getParH(level)->recvProcessNeighborY[i].f[0], // para->getParH(level)->recvProcessNeighborY[i].numberOfFs, @@ -603,7 +603,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborYFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, // para->getParH(level)->recvProcessNeighborY[i].f[0], // para->getParH(level)->recvProcessNeighborY[i].numberOfFs, @@ -650,7 +650,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborZFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, // para->getParH(level)->recvProcessNeighborZ[i].f[0], // para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, @@ -690,7 +690,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborZFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, // para->getParH(level)->recvProcessNeighborZ[i].f[0], // para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, @@ -790,7 +790,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], para->getParH(level)->recvProcessNeighborADX[i].numberOfFs, para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor); } @@ -798,7 +798,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], // para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); //} @@ -806,13 +806,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); } @@ -820,13 +820,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -871,7 +871,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], para->getParH(level)->recvProcessNeighborADX[i].numberOfFs, para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor); } @@ -879,7 +879,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], // para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); //} @@ -887,13 +887,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); } @@ -901,13 +901,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -959,7 +959,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], para->getParH(level)->recvProcessNeighborADY[i].numberOfFs, para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor); } @@ -967,7 +967,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], // para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); //} @@ -975,13 +975,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); } @@ -989,13 +989,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1040,7 +1040,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], para->getParH(level)->recvProcessNeighborADY[i].numberOfFs, para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor); } @@ -1048,7 +1048,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], // para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); //} @@ -1056,13 +1056,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); } @@ -1070,13 +1070,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1128,7 +1128,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs, para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor); } @@ -1136,7 +1136,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], // para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); //} @@ -1144,13 +1144,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); } @@ -1158,13 +1158,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1209,7 +1209,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs, para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor); } @@ -1217,7 +1217,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], // para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); //} @@ -1225,13 +1225,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); } @@ -1239,13 +1239,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1345,7 +1345,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU( + comm.nbRecvDataGPU( para->getParH(level)->recvProcessNeighborF3X[i].g[0], para->getParH(level)->recvProcessNeighborF3X[i].numberOfGs, para->getParH(level)->recvProcessNeighborF3X[i].rankNeighbor); @@ -1354,7 +1354,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU( + comm.sendDataGPU( para->getParH(level)->sendProcessNeighborF3X[i].g[0], para->getParH(level)->sendProcessNeighborF3X[i].numberOfGs, para->getParH(level)->sendProcessNeighborF3X[i].rankNeighbor); @@ -1363,13 +1363,13 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1423,7 +1423,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU( + comm.nbRecvDataGPU( para->getParH(level)->recvProcessNeighborF3Y[i].g[0], para->getParH(level)->recvProcessNeighborF3Y[i].numberOfGs, para->getParH(level)->recvProcessNeighborF3Y[i].rankNeighbor); @@ -1432,7 +1432,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU( + comm.sendDataGPU( para->getParH(level)->sendProcessNeighborF3Y[i].g[0], para->getParH(level)->sendProcessNeighborF3Y[i].numberOfGs, para->getParH(level)->sendProcessNeighborF3Y[i].rankNeighbor); @@ -1441,13 +1441,13 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1501,7 +1501,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU( + comm.nbRecvDataGPU( para->getParH(level)->recvProcessNeighborF3Z[i].g[0], para->getParH(level)->recvProcessNeighborF3Z[i].numberOfGs, para->getParH(level)->recvProcessNeighborF3Z[i].rankNeighbor); @@ -1510,7 +1510,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU( + comm.sendDataGPU( para->getParH(level)->sendProcessNeighborF3Z[i].g[0], para->getParH(level)->sendProcessNeighborF3Z[i].numberOfGs, para->getParH(level)->sendProcessNeighborF3Z[i].rankNeighbor); @@ -1519,13 +1519,13 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index 381938fedf9c0ceb79286df1c91ba7d312e312b9..c6116ea37e6a6b17c7c3ded73d3e8478f07c41da 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -14,9 +14,9 @@ ////////////////////////////////////////////////////////////////////////// // 1D domain decomposition -extern "C" void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition @@ -62,7 +62,7 @@ extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int leve //! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding //! \param sendProcessNeighborDev, recvProcessNeighborDev, sendProcessNeighborHost, recvProcessNeighborHost are pointers //! to the send and receive arrays, both on the device and the host -extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, @@ -70,13 +70,13 @@ extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *c std::vector<ProcessNeighbor27> *recvProcessNeighborHost); //! \brief calls exchangeCollDataXGPU27() for exchanging all nodes //! \details used in the communication after collision step -extern "C" void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); //! \brief calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two //! interpolation processes on refined grids \details Only exchange nodes which are part of the interpolation process on //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and //! before the interpolation coarse to fine. \ref see master thesis of Anna Wellmann -extern "C" void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); //! \brief distribute the receive nodes (x direction) from the buffer on the gpu //! \details needed to exchange all nodes, used in the communication after collision step @@ -93,15 +93,15 @@ extern "C" void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int l extern "C" void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex); extern "C" void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex); -extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHos); -extern "C" void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -extern "C" void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); extern "C" void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex); extern "C" void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex); @@ -110,15 +110,15 @@ extern "C" void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int l extern "C" void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex); extern "C" void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex); -extern "C" void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost); -extern "C" void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -extern "C" void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); extern "C" void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex); @@ -126,28 +126,28 @@ extern "C" void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int l ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition convection diffusion -extern "C" void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator *comm, +extern "C" void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition F3 - K18/K20 -extern "C" void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); -extern "C" void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +extern "C" void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level); ////////////////////////////////////////////////////////////////////////// -extern "C" void barrierGPU(vf::gpu::Communicator *comm); +extern "C" void barrierGPU(vf::gpu::Communicator &comm); ////////////////////////////////////////////////////////////////////////// #endif diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp index 1bdf32f281dc4f2d22cf6bf3b3ff43ca62cd592c..c11602aa7f7f414f8f315de6ccb76f72de30613b 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp @@ -34,8 +34,8 @@ void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoar recvIndicesForCommAfterFtoCPositions.resize( (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes * 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way - auto comm = vf::gpu::Communicator::getInstanz(); - comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), + vf::gpu::Communicator& comm = vf::gpu::Communicator::getInstance(); + comm.exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].rankNeighbor, sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(), para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].rankNeighbor); @@ -76,8 +76,8 @@ void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoar recvIndicesForCommAfterFtoCPositions.resize( (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes * 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way - auto comm = vf::gpu::Communicator::getInstanz(); - comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), + vf::gpu::Communicator& comm = vf::gpu::Communicator::getInstance(); + comm.exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].rankNeighbor, sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(), para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].rankNeighbor); @@ -119,8 +119,8 @@ void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoar recvIndicesForCommAfterFtoCPositions.resize( (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes * 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way - auto comm = vf::gpu::Communicator::getInstanz(); - comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), + vf::gpu::Communicator& comm = vf::gpu::Communicator::getInstance(); + comm.exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor, sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(), para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor); @@ -304,7 +304,7 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC( sendIndices[i + numberOfSendNodesAfterFtoC] = sendIndicesOther[i]; *logging::out << logging::Logger::INFO_INTERMEDIATE << "... Process " - << " " << vf::gpu::Communicator::getInstanz()->getPID() + << " " << vf::gpu::Communicator::getInstance().getPID() << " numberOfSendNodesAfterFtoC: " << numberOfSendNodesAfterFtoC << "\n "; if (numberOfSendNodesAfterFtoC + sendIndicesOther.size() != numberOfSendIndices) { @@ -454,7 +454,7 @@ void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC( recvIndices[i + numberOfRecvNodesAfterFtoC] = recvIndicesOther[i]; *logging::out << logging::Logger::INFO_INTERMEDIATE << "... Process " - << " " << vf::gpu::Communicator::getInstanz()->getPID() + << " " << vf::gpu::Communicator::getInstance().getPID() << " numberOfRecvNodesAfterFtoC: " << numberOfRecvNodesAfterFtoC << "\n "; if (numberOfRecvNodesAfterFtoC + recvIndicesOther.size() != numberOfRecvIndices) { diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp index b96f00f35ee25d4ac695b4beeeee691ad40ccc15..f07bc6763be9d06743fa1c8db93973cfde688f87 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp @@ -32,7 +32,7 @@ private: uint numberOfSendIndices; public: - LevelGridBuilderDouble(SPtr<Grid> grid) : LevelGridBuilder(Device(), ""), grid(grid){}; + LevelGridBuilderDouble(SPtr<Grid> grid) : LevelGridBuilder(), grid(grid){}; SPtr<Grid> getGrid(uint level) override { return grid; }; std::shared_ptr<Grid> getGrid(int level, int box) override { return grid; }; void setNumberOfSendIndices(uint numberOfSendIndices) { this->numberOfSendIndices = numberOfSendIndices; }; @@ -46,17 +46,17 @@ private: public: GridImpDouble(Object *object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, - SPtr<GridStrategy> gridStrategy, Distribution d, uint level) - : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, gridStrategy, d, level) + Distribution d, uint level) + : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, d, level) { } static SPtr<GridImpDouble> makeShared(Object *object, real startX, real startY, real startZ, real endX, real endY, - real endZ, real delta, SPtr<GridStrategy> gridStrategy, Distribution d, + real endZ, real delta, Distribution d, uint level) { SPtr<GridImpDouble> grid( - new GridImpDouble(object, startX, startY, startZ, endX, endY, endZ, delta, gridStrategy, d, level)); + new GridImpDouble(object, startX, startY, startZ, endX, endY, endZ, delta, d, level)); return grid; } @@ -119,7 +119,7 @@ private: std::unique_ptr<IndexRearrangementForStreams> createTestSubjectCFBorderBulk() { SPtr<GridImpDouble> grid = - GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1); + GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, Distribution(), 1); grid->setFluidNodeIndicesBorder(cf.fluidNodeIndicesBorder); std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid); @@ -207,7 +207,7 @@ private: std::unique_ptr<IndexRearrangementForStreams> createTestSubjectFCBorderBulk() { SPtr<GridImpDouble> grid = - GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1); + GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, Distribution(), 1); grid->setFluidNodeIndicesBorder(fc.fluidNodeIndicesBorder); std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid); @@ -297,7 +297,7 @@ private: logging::Logger::addStream(&std::cout); MPI_Init(NULL, NULL); SPtr<GridImpDouble> grid = - GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1); + GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, Distribution(), 1); std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid); builder->setNumberOfSendIndices((uint)si.sendIndices.size()); diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 4f8c297c01062efcdee3c6857b1bb71cd11301af..706f3da00fe2f98bedd1975951cc8d0a8f189a7d 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -392,7 +392,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std ////////////////////////////////////////////////////////////////////////// // Init UpdateGrid ////////////////////////////////////////////////////////////////////////// - this->updateGrid27 = std::make_unique<UpdateGrid27>(para, comm, cudaManager, pm, kernels); + this->updateGrid27 = std::make_unique<UpdateGrid27>(para, communicator, cudaManager, pm, kernels); ////////////////////////////////////////////////////////////////////////// //Print Init @@ -405,7 +405,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std ////////////////////////////////////////////////////////////////////////// output << "used Device Memory: " << cudaManager->getMemsizeGPU() / 1000000.0 << " MB\n"; - // std::cout << "Process " << comm->getPID() <<": used device memory" << cudaManager->getMemsizeGPU() / 1000000.0 << " MB\n" << std::endl; + // std::cout << "Process " << communicator.getPID() <<": used device memory" << cudaManager->getMemsizeGPU() / 1000000.0 << " MB\n" << std::endl; ////////////////////////////////////////////////////////////////////////// //InterfaceDebugWriter::writeInterfaceLinesDebugCF(para.get()); @@ -487,7 +487,7 @@ void Simulation::run() //////////////////////////////////////////////////////////////////////////////// if (this->kineticEnergyAnalyzer || this->enstrophyAnalyzer) { prepareExchangeMultiGPU(para.get(), 0, -1); - exchangeMultiGPU(para.get(), comm, cudaManager.get(), 0, -1); + exchangeMultiGPU(para.get(), communicator, cudaManager.get(), 0, -1); } if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(t); @@ -717,7 +717,7 @@ void Simulation::run() ////////////////////////////////////////////////////////////////////////// //exchange data for valid post process prepareExchangeMultiGPU(para.get(), lev, -1); - exchangeMultiGPU(para.get(), comm, cudaManager.get(), lev, -1); + exchangeMultiGPU(para.get(), communicator, cudaManager.get(), lev, -1); ////////////////////////////////////////////////////////////////////////// //if (para->getD3Qxx()==19) //{ @@ -997,19 +997,7 @@ void Simulation::run() } } - ////////////////////////////////////////////////////////////////////////// - // When using multiple GPUs, get Nups of all processes - if (para->getMaxDev() > 1) { - std::vector<double> nups = comm->gatherNUPS(fnups); - if (comm->getPID() == 0) { - double sum = 0; - for (uint pid = 0; pid < nups.size(); pid++) { - output << "Process " << pid << ": Nups in Mio: " << nups[pid] << "\n"; - sum += nups[pid]; - } - output << "Sum of all processes: Nups in Mio: " << sum << "\n"; - } - } + ///////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //printDragLift(para); @@ -1337,4 +1325,5 @@ void Simulation::free() probe->free(para.get(), cudaManager.get()); } ////////////////////////////////////////////////////////////////////////// -} + } +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp index 4b2d2f275bd127f0065f135acb94b738260c4668..97b0b7421a1eb457096d191fe555122f23859ae3 100644 --- a/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp @@ -54,7 +54,7 @@ void writeEdgeNodesXZ_Send(SPtr<Parameter> para) nodeCount++; } std::string filenameVec = para->getFName() + "_writeEdgeNodesXZ_Send_PID_" + - std::to_string(vf::gpu::Communicator::getInstanz()->getPID()) + "_" + + std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" + StringUtil::toString<int>(level); WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata); @@ -90,7 +90,7 @@ void writeEdgeNodesXZ_Recv(SPtr<Parameter> para) nodeCount++; } std::string filenameVec = para->getFName() + "_writeEdgeNodesXZ_Recv_PID_" + - std::to_string(vf::gpu::Communicator::getInstanz()->getPID()) + "_" + + std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" + StringUtil::toString<int>(level); WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata); diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp index 26fc6851842ba20b73feb8efae29768fa1a47e9e..dd07e4db5e970de5dda9fc5a7dbf395f1321f04f 100644 --- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp @@ -656,7 +656,7 @@ void writeInterfaceFCC_Send(Parameter *para) nodeCount++; } std::string filenameVec = para->getFName() + "_writeInterfaceFCC_Send_PID_" + - std::to_string(vf::gpu::Communicator::getInstanz()->getPID()) + "_" + + std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" + StringUtil::toString<int>(level); WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata); @@ -709,7 +709,7 @@ void writeInterfaceCFC_Recv(Parameter *para) nodeCount++; } std::string filenameVec = para->getFName() + "_writeInterfaceCFC_Recv_PID_" + - std::to_string(vf::gpu::Communicator::getInstanz()->getPID()) + "_" + + std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" + StringUtil::toString<int>(level); WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata); @@ -814,7 +814,7 @@ void writeSendNodesStream(Parameter *para) } } std::string filenameVec = para->getFName() + "_writeSendNodesStreams_PID_" + - std::to_string(vf::gpu::Communicator::getInstanz()->getPID()) + "_" + + std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" + StringUtil::toString<int>(level); WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata); @@ -900,7 +900,7 @@ void writeRecvNodesStream(Parameter *para) // Recv are nodes ghost nodes and therefore they can't be iCellCFCs std::string filenameVec = para->getFName() + "_writeRecvNodesStreams_PID_" + - std::to_string(vf::gpu::Communicator::getInstanz()->getPID()) + "_" + + std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" + StringUtil::toString<int>(level); WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata); diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp index 823364a22eca41517816c1fdb61dfdc96ef1d961..b08a4ea11e6227d460ef5913695ebebf2474a02b 100644 --- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp @@ -3,6 +3,7 @@ #include <cuda_runtime.h> #include "UbScheduler.h" #include "Timer.h" +#include "VirtualFluids_GPU/Communication/Communicator.h" void Timer::initTimer() @@ -48,4 +49,18 @@ void Timer::outputPerformance(uint t, Parameter* para) } VF_LOG_INFO(" --- {} --- {}/{} \t {} \t {}", this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth ); + + // When using multiple GPUs, get Nups of all processes + if (para->getMaxDev() > 1) { + vf::gpu::Communicator& comm=vf::gpu::Communicator::getInstance(); + std::vector<double> nups = comm.gatherNUPS(fnups); + if (comm.getPID() == 0) { + double sum = 0; + for (uint pid = 0; pid < nups.size(); pid++) { + VF_LOG_INFO("Process {}: \t NUPS in Mio: {}", pid, nups[pid]); + sum += nups[pid]; + } + VF_LOG_INFO("Sum of all processes: Nups in Mio: {}", sum); + } + } } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index d38f533b0c6e99220f6f787e4318dbf00523e2b2..4f8e9ef3d5686937d257538047c97c350ebff16a 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -604,10 +604,6 @@ void Parameter::setOutputCount(unsigned int outputCount) { this->outputCount = outputCount; } -void Parameter::setPhi(real inPhi) { Phi = inPhi; } -void Parameter::setAngularVelocity(real inAngVel) { angularVelocity = inAngVel; } -void Parameter::setStepEnsight(unsigned int step) { this->stepEnsight = step; } -void Parameter::setOutputCount(unsigned int outputCount) { this->outputCount = outputCount; } void Parameter::setlimitOfNodesForVTK(unsigned int limitOfNodesForVTK) { this->limitOfNodesForVTK = limitOfNodesForVTK; @@ -667,6 +663,18 @@ void Parameter::setViscosityRatio(real ViscosityRatio) { ic.vis_ratio = Viscosit void Parameter::setVelocityRatio(real VelocityRatio) { ic.u0_ratio = VelocityRatio; } void Parameter::setDensityRatio(real DensityRatio) { ic.delta_rho = DensityRatio; } void Parameter::setPressRatio(real PressRatio) { ic.delta_press = PressRatio; } +real Parameter::getTimeRatio() +{ + return this->getViscosityRatio()*pow(this->getVelocityRatio(),-2); +} +real Parameter::getForceRatio() +{ + return this->getDensityRatio()*pow(this->getViscosityRatio(),2); +} +real Parameter::getLengthRatio() +{ + return this->getViscosityRatio()/this->getVelocityRatio(); +} void Parameter::setRealX(real RealX) { ic.RealX = RealX; } void Parameter::setRealY(real RealY) { ic.RealY = RealY; } void Parameter::setPressInID(unsigned int PressInID) { ic.PressInID = PressInID; } @@ -691,7 +699,6 @@ void Parameter::setIsCp(bool isCp) { ic.isCp = isCp; } void Parameter::setConcFile(bool concFile) { ic.isConc = concFile; } void Parameter::setStreetVelocityFile(bool streetVelocityFile) { ic.streetVelocityFile = streetVelocityFile; } void Parameter::setUseMeasurePoints(bool useMeasurePoints) { ic.isMeasurePoints = useMeasurePoints; } -void Parameter::setUseWale(bool useWale) { ic.isWale = useWale; } void Parameter::setUseInitNeq(bool useInitNeq) { ic.isInitNeq = useInitNeq; } void Parameter::setSimulatePorousMedia(bool simulatePorousMedia) { ic.simulatePorousMedia = simulatePorousMedia; } void Parameter::setUseTurbulentViscosity(bool useTurbulentViscosity) @@ -1048,7 +1055,23 @@ void Parameter::setMultiKernelOn(bool isOn) { this->multiKernelOn = isOn; } void Parameter::setMultiKernelLevel(std::vector<int> kernelLevel) { this->multiKernelLevel = kernelLevel; } void Parameter::setMultiKernel(std::vector<std::string> kernel) { this->multiKernel = kernel; } void Parameter::setADKernel(std::string adKernel) { this->adKernel = adKernel; } + + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//add-methods +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Parameter::addActuator(SPtr<PreCollisionInteractor> actuator) +{ + actuators.push_back(actuator); +} +void Parameter::addProbe(SPtr<PreCollisionInteractor> probe) +{ + probes.push_back(probe); +} + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // get-methods @@ -1259,9 +1282,33 @@ bool Parameter::getConcFile() { return ic.isConc; } bool Parameter::isStreetVelocityFile() { return ic.streetVelocityFile; } bool Parameter::getUseMeasurePoints() { return ic.isMeasurePoints; } bool Parameter::getUseWale() { return ic.isWale; } +bool Parameter::getUseAMD() +{ + return ic.isAMD; +}bool Parameter::getUseTurbulentViscosity() +{ + return ic.isTurbulentViscosity; +} +real Parameter::getSGSConstant() +{ + return ic.SGSConstant; +} +bool Parameter::getHasWallModelMonitor() +{ + return ic.hasWallModelMonitor; +} +std::vector<SPtr<PreCollisionInteractor>> Parameter::getActuators() +{ + return actuators; +} +std::vector<SPtr<PreCollisionInteractor>> Parameter::getProbes() +{ + return probes; +} bool Parameter::getUseInitNeq() { return ic.isInitNeq; } bool Parameter::getSimulatePorousMedia() { return ic.simulatePorousMedia; } + bool Parameter::getIsF3() { return this->isF3; } bool Parameter::getIsBodyForce() { return this->isBodyForce; }