Skip to content
Snippets Groups Projects
Commit 9cc23359 authored by Anna Wellmann's avatar Anna Wellmann
Browse files

Prepare prepareExchangeCollDataYGPU27 to use reduced number of send indices

parent ed4e44ab
No related branches found
No related tags found
1 merge request!104Add Communication Hiding to GPU version
......@@ -86,9 +86,10 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage
if (para->useReducedCommunicationAfterFtoC) {
prepareExchangeMultiGPU(para, level, -1); // TODO
exchangeMultiGPU(para, comm, cudaManager, level, -1); // TODO
} else {
prepareExchangeMultiGPU(para, level, -1);
exchangeMultiGPU(para, comm, cudaManager, level, -1);
}
prepareExchangeMultiGPU(para, level, -1);
exchangeMultiGPU(para, comm, cudaManager, level, -1);
coarseToFine(para, level);
} else {
......@@ -216,6 +217,14 @@ void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex)
prepareExchangeCollDataXGPU27(para, level, streamIndex);
prepareExchangeCollDataYGPU27(para, level, streamIndex);
prepareExchangeCollDataZGPU27(para, level, streamIndex);
}
}
void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex) {
if (para->getNumprocs() > 1) {
prepareExchangeCollDataXGPU27(para, level, streamIndex);
prepareExchangeCollDataYGPU27(para, level, streamIndex, true);
prepareExchangeCollDataZGPU27(para, level, streamIndex);
}
}
......
......@@ -27,6 +27,7 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt
extern "C" void collisionAdvectionDiffusion(Parameter* para, int level);
extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex);
extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex);
extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
int level, int streamIndex);
......
......@@ -108,14 +108,20 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Y
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex)
void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC)
{
cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
std::vector<ProcessNeighbor27> *sendProcessNeighbor;
if (useReducedCommunicationAfterFtoC)
sendProcessNeighbor = &para->getParD(level)->sendProcessNeighborsAfterFtoCY;
else
sendProcessNeighbor = &para->getParD(level)->sendProcessNeighborY;
for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
GetSendFsPostDev27(para->getParD(level)->d0SP.f[0],
para->getParD(level)->sendProcessNeighborY[i].f[0],
para->getParD(level)->sendProcessNeighborY[i].index,
para->getParD(level)->sendProcessNeighborY[i].numberOfNodes,
(*sendProcessNeighbor)[i].numberOfNodes,
para->getParD(level)->neighborX_SP,
para->getParD(level)->neighborY_SP,
para->getParD(level)->neighborZ_SP,
......@@ -133,6 +139,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
//copy Device to Host
for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex);
// todo: vorher pointer auf para->getParD(level)->sendProcessNeighborY[i].f[0] fr sendProcessNeighborsAfterFtoCY bernehmen
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//start non blocking MPI receive
......
......@@ -17,7 +17,7 @@ extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator
extern "C" void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex);
extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
int level, int streamIndex);
extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex);
extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC = false);
extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
int level, int streamIndex);
extern "C" void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex);
......
......@@ -235,11 +235,6 @@ typedef struct PN27{
int numberOfFs;
}ProcessNeighbor27;
typedef struct PN27AfterFtoC {
uint memsizeFs;
int numberOfNodes;
} ProcessNeighbor27AfterFtoC;
typedef struct PN_F3 {
real* g[6];
uint memsizeGs;
......
......@@ -1436,7 +1436,7 @@ void Parameter::setIsNeighborZ(bool isNeigbor)
void Parameter::setSendProcessNeighborsAfterFtoCX(int numberOfNodes, int level, int arrayIndex) {
this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes;
this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes;
this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) *numberOfNodes;
this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes;
this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes;
}
void Parameter::setSendProcessNeighborsAfterFtoCY(int numberOfNodes, int level, int arrayIndex)
......
......@@ -294,12 +294,12 @@ struct LBMSimulationParameter
std::vector<ProcessNeighbor27> recvProcessNeighborY;
std::vector<ProcessNeighbor27> recvProcessNeighborZ;
std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCX;
std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCY;
std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCZ;
std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCX;
std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCY;
std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCZ;
std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCX;
std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCY;
std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCZ;
std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCX;
std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCY;
std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCZ;
///////////////////////////////////////////////////////
// 3D domain decomposition convection diffusion
std::vector<ProcessNeighbor27> sendProcessNeighborADX;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment