From 7ed1cb2df2adc2797b010c3e3d173b7dcaf5c0b6 Mon Sep 17 00:00:00 2001 From: Anna Wellmann <a.wellmann@tu-braunschweig.de> Date: Wed, 6 Oct 2021 17:37:34 +0200 Subject: [PATCH] Use reduced communication after fine to coarse --- apps/gpu/LBM/MusselOyster/MusselOyster.cpp | 7 +-- .../Calculation/UpdateGrid27.cpp | 44 +++++++--------- .../Calculation/UpdateGrid27.h | 4 +- .../Communication/ExchangeData27.cpp | 50 +++++++++++++------ .../Communication/ExchangeData27.h | 7 ++- .../GPU/CudaMemoryManager.cpp | 14 +++--- .../VirtualFluids_GPU/GPU/CudaMemoryManager.h | 9 ++-- .../VirtualFluids_GPU/Parameter/Parameter.cpp | 12 +++++ 8 files changed, 92 insertions(+), 55 deletions(-) diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp index d716ccf1d..ddccc671b 100644 --- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp +++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp @@ -109,9 +109,10 @@ void multipleLevel(const std::string& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool useGridGenerator = true; - bool useMultiGPU = true; - bool useStreams = true; - bool useLevels = true; + bool useMultiGPU = true; + bool useStreams = true; + bool useLevels = true; + para->useReducedCommunicationAfterFtoC = true; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 96f5287fc..3f6c251d3 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -83,25 +83,17 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage para->getParD(level)->intFCBulk.ICellFCF, para->getParD(level)->intFCBulk.kFC, -1); - if (para->useReducedCommunicationAfterFtoC) { - prepareExchangeMultiGPU(para, level, -1); // TODO - exchangeMultiGPU(para, comm, cudaManager, level, -1); // TODO - } else { - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); - } + prepareExchangeMultiGPUAfterFtoC(para, level, -1); + exchangeMultiGPUAfterFtoC(para, comm, cudaManager, level, -1); coarseToFine(para, level); - } else if (para->getNumprocs() > 1) { + } else { fineToCoarse(para, level); - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); + prepareExchangeMultiGPUAfterFtoC(para, level, -1); + exchangeMultiGPUAfterFtoC(para, comm, cudaManager, level, -1); coarseToFine(para, level); - } else { - fineToCoarse(para, level); - coarseToFine(para, level); } } } @@ -214,25 +206,22 @@ void collisionAdvectionDiffusion(Parameter* para, int level) } } -void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex) +void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC) { if (para->getNumprocs() > 1) { - prepareExchangeCollDataXGPU27(para, level, streamIndex); - prepareExchangeCollDataYGPU27(para, level, streamIndex); - prepareExchangeCollDataZGPU27(para, level, streamIndex); + prepareExchangeCollDataXGPU27(para, level, streamIndex); // TODO + prepareExchangeCollDataYGPU27(para, level, streamIndex, useReducedCommunicationAfterFtoC); + prepareExchangeCollDataZGPU27(para, level, streamIndex); // TODO } } -void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex) { - if (para->getNumprocs() > 1) { - prepareExchangeCollDataXGPU27(para, level, streamIndex); - prepareExchangeCollDataYGPU27(para, level, streamIndex, true); - prepareExchangeCollDataZGPU27(para, level, streamIndex); - } +void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex) +{ + prepareExchangeMultiGPU(para, level, streamIndex, para->useReducedCommunicationAfterFtoC); } void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, - int streamIndex) + int streamIndex, bool useReducedCommunicationAfterFtoC) { if (para->getNumprocs() > 1) { @@ -241,7 +230,7 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa ////////////////////////////////////////////////////////////////////////// //3D domain decomposition exchangeCollDataXGPU27(para, comm, cudaManager, level, streamIndex); - exchangeCollDataYGPU27(para, comm, cudaManager, level, streamIndex); + exchangeCollDataYGPU27(para, comm, cudaManager, level, streamIndex, useReducedCommunicationAfterFtoC); exchangeCollDataZGPU27(para, comm, cudaManager, level, streamIndex); ////////////////////////////////////////////////////////////////////////// @@ -264,6 +253,11 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa //exchangePostCollDataGPU27(para, comm, level); } } +void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, + int streamIndex) +{ + exchangeMultiGPU(para, comm, cudaManager, level, streamIndex, para->useReducedCommunicationAfterFtoC); +} void postCollisionBC(Parameter* para, int level, unsigned int t) { diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 20da69d32..a06e58ae6 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -26,10 +26,12 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt extern "C" void collisionAdvectionDiffusion(Parameter* para, int level); -extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex); +extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC = false); extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex); extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex, bool useReducedCommunicationAfterFtoC = false); +extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex); extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 0bf106248..f19b283d6 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -98,7 +98,7 @@ void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); std::vector<ProcessNeighbor27> *sendProcessNeighbor = - getSendProcessNeighborY(useReducedCommunicationAfterFtoC, para, level); + getSendProcessNeighborDevY(useReducedCommunicationAfterFtoC, para, level); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], @@ -118,23 +118,26 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe int streamIndex, bool useReducedCommunicationAfterFtoC) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - std::vector<ProcessNeighbor27> *sendProcessNeighbor = - getSendProcessNeighborY(useReducedCommunicationAfterFtoC, para, level); - std::vector<ProcessNeighbor27> *recvProcessNeighbor = - getRecvProcessNeighborY(useReducedCommunicationAfterFtoC, para, level); + std::vector<ProcessNeighbor27> *sendProcessNeighborDev = + getSendProcessNeighborDevY(useReducedCommunicationAfterFtoC, para, level); + std::vector<ProcessNeighbor27> *recvProcessNeighborDev = + getRecvProcessNeighborDevY(useReducedCommunicationAfterFtoC, para, level); + std::vector<ProcessNeighbor27> *sendProcessNeighborHost = + getSendProcessNeighborHostY(useReducedCommunicationAfterFtoC, para, level); + std::vector<ProcessNeighbor27> *recvProcessNeighborHost = + getRecvProcessNeighborHostY(useReducedCommunicationAfterFtoC, para, level); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) - cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex); - // todo: vorher pointer auf para->getParD(level)->sendProcessNeighborY[i].f[0] für sendProcessNeighborsAfterFtoCY übernehmen + cudaManager->cudaCopyProcessNeighborYFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0], - para->getParH(level)->recvProcessNeighborY[i].numberOfFs, + (*recvProcessNeighborHost)[i].numberOfFs, para->getParH(level)->recvProcessNeighborY[i].rankNeighbor); } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -156,8 +159,8 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], - para->getParH(level)->sendProcessNeighborY[i].numberOfFs, + comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], + (*sendProcessNeighborHost)[i].numberOfFs, para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -176,13 +179,13 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe //copy Host to Device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - cudaManager->cudaCopyProcessNeighborYFsHD(level, i, streamIndex); + cudaManager->cudaCopyProcessNeighborYFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex); ////////////////////////////////////////////////////////////////////////// SetRecvFsPostDev27(para->getParD(level)->d0SP.f[0], para->getParD(level)->recvProcessNeighborY[i].f[0], para->getParD(level)->recvProcessNeighborY[i].index, - para->getParD(level)->recvProcessNeighborY[i].numberOfNodes, - para->getParD(level)->neighborX_SP, + (*recvProcessNeighborDev)[i].numberOfNodes, + para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, para->getParD(level)->size_Mat_SP, @@ -193,7 +196,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } -std::vector<ProcessNeighbor27> *getSendProcessNeighborY(bool useReducedCommunicationAfterFtoC, Parameter *para, +std::vector<ProcessNeighbor27> *getSendProcessNeighborDevY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level) { if (useReducedCommunicationAfterFtoC) @@ -202,7 +205,7 @@ std::vector<ProcessNeighbor27> *getSendProcessNeighborY(bool useReducedCommunica return ¶->getParD(level)->sendProcessNeighborY; } -std::vector<ProcessNeighbor27> *getRecvProcessNeighborY(bool useReducedCommunicationAfterFtoC, Parameter *para, +std::vector<ProcessNeighbor27> *getRecvProcessNeighborDevY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level) { if (useReducedCommunicationAfterFtoC) @@ -210,6 +213,23 @@ std::vector<ProcessNeighbor27> *getRecvProcessNeighborY(bool useReducedCommunica else return ¶->getParD(level)->recvProcessNeighborY; } +std::vector<ProcessNeighbor27> *getSendProcessNeighborHostY(bool useReducedCommunicationAfterFtoC, Parameter *para, + int level) +{ + if (useReducedCommunicationAfterFtoC) + return ¶->getParH(level)->sendProcessNeighborsAfterFtoCY; + else + return ¶->getParH(level)->sendProcessNeighborY; +} + +std::vector<ProcessNeighbor27> *getRecvProcessNeighborHostY(bool useReducedCommunicationAfterFtoC, Parameter *para, + int level) +{ + if (useReducedCommunicationAfterFtoC) + return ¶->getParH(level)->recvProcessNeighborsAfterFtoCY; + else + return ¶->getParH(level)->recvProcessNeighborY; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index 887e4d2b3..6ecf1e287 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -18,8 +18,11 @@ extern "C" void prepareExchangeCollDataXGPU27(Parameter *para, int level, int st extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex); extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, - bool useReducedCommunicationAfterFtoC = false); -std::vector<ProcessNeighbor27> *getSendProcessNeighborY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level); + bool useReducedCommunicationAfterFtoC); +std::vector<ProcessNeighbor27> *getSendProcessNeighborDevY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level); +std::vector<ProcessNeighbor27> *getRecvProcessNeighborDevY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level); +std::vector<ProcessNeighbor27> *getSendProcessNeighborHostY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level); +std::vector<ProcessNeighbor27> *getRecvProcessNeighborHostY(bool useReducedCommunicationAfterFtoC, Parameter *para, int level); extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex, bool useReducedCommunicationAfterFtoC = false); extern "C" void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex); diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index a5e39aaad..d1f9eaaa0 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -578,32 +578,34 @@ void CudaMemoryManager::cudaCopyProcessNeighborYIndex(int lev, unsigned int proc parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].memsizeIndex, cudaMemcpyHostToDevice)); } -void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, int streamIndex) +void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv, + int streamIndex) { if (streamIndex == -1) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].memsizeFs, + parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice)); else checkCudaErrors(cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].memsizeFs, + parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice, parameter->getStreamManager()->getStream(streamIndex))); } -void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex) +void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend, + int streamIndex) { if (streamIndex == -1) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, + parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost)); else checkCudaErrors( cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, + parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost, parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor) diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h index 718cccbbb..0f8008ae4 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h @@ -95,16 +95,19 @@ public: void cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor); // void cudaAllocProcessNeighborY(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, int streamIndex); - void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex); + void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv, + int streamIndex); + void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend, + int streamIndex); void cudaCopyProcessNeighborYIndex(int lev, unsigned int processNeighbor); - void cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor); + void cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor); // void cudaAllocProcessNeighborZ(int lev, unsigned int processNeighbor); void cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, int streamIndex); void cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, int streamIndex); void cudaCopyProcessNeighborZIndex(int lev, unsigned int processNeighbor); void cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor); + ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 88531ea38..78b065bff 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -1438,6 +1438,8 @@ void Parameter::setSendProcessNeighborsAfterFtoCX(int numberOfNodes, int level, this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes; this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; + this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; + this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; } void Parameter::setSendProcessNeighborsAfterFtoCY(int numberOfNodes, int level, int arrayIndex) { @@ -1445,6 +1447,8 @@ void Parameter::setSendProcessNeighborsAfterFtoCY(int numberOfNodes, int level, this->getParD(level)->sendProcessNeighborsAfterFtoCY[arrayIndex].numberOfNodes = numberOfNodes; this->getParH(level)->sendProcessNeighborsAfterFtoCY[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->sendProcessNeighborsAfterFtoCY[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; + this->getParH(level)->sendProcessNeighborsAfterFtoCY[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; + this->getParD(level)->sendProcessNeighborsAfterFtoCY[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; } void Parameter::setSendProcessNeighborsAfterFtoCZ(int numberOfNodes, int level, int arrayIndex) { @@ -1452,6 +1456,8 @@ void Parameter::setSendProcessNeighborsAfterFtoCZ(int numberOfNodes, int level, this->getParD(level)->sendProcessNeighborsAfterFtoCZ[arrayIndex].numberOfNodes = numberOfNodes; this->getParH(level)->sendProcessNeighborsAfterFtoCZ[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->sendProcessNeighborsAfterFtoCZ[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; + this->getParH(level)->sendProcessNeighborsAfterFtoCZ[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; + this->getParD(level)->sendProcessNeighborsAfterFtoCZ[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; } void Parameter::setRecvProcessNeighborsAfterFtoCX(int numberOfNodes, int level, int arrayIndex) { @@ -1459,6 +1465,8 @@ void Parameter::setRecvProcessNeighborsAfterFtoCX(int numberOfNodes, int level, this->getParD(level)->recvProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes; this->getParH(level)->recvProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->recvProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; + this->getParH(level)->recvProcessNeighborsAfterFtoCX[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; + this->getParD(level)->recvProcessNeighborsAfterFtoCX[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; } void Parameter::setRecvProcessNeighborsAfterFtoCY(int numberOfNodes, int level, int arrayIndex) { @@ -1466,6 +1474,8 @@ void Parameter::setRecvProcessNeighborsAfterFtoCY(int numberOfNodes, int level, this->getParD(level)->recvProcessNeighborsAfterFtoCY[arrayIndex].numberOfNodes = numberOfNodes; this->getParH(level)->recvProcessNeighborsAfterFtoCY[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->recvProcessNeighborsAfterFtoCY[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; + this->getParH(level)->recvProcessNeighborsAfterFtoCY[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; + this->getParD(level)->recvProcessNeighborsAfterFtoCY[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; } void Parameter::setRecvProcessNeighborsAfterFtoCZ(int numberOfNodes, int level, int arrayIndex) { @@ -1473,6 +1483,8 @@ void Parameter::setRecvProcessNeighborsAfterFtoCZ(int numberOfNodes, int level, this->getParD(level)->recvProcessNeighborsAfterFtoCZ[arrayIndex].numberOfNodes = numberOfNodes; this->getParH(level)->recvProcessNeighborsAfterFtoCZ[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->recvProcessNeighborsAfterFtoCZ[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; + this->getParH(level)->recvProcessNeighborsAfterFtoCZ[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; + this->getParD(level)->recvProcessNeighborsAfterFtoCZ[arrayIndex].numberOfFs = this->D3Qxx * numberOfNodes; } void Parameter::setgeomBoundaryNormalX(std::string geomNormalX) { -- GitLab