Skip to content
Snippets Groups Projects
Commit 7da5ad73 authored by Anna Wellmann's avatar Anna Wellmann Committed by Anna Wellmann
Browse files

Not use edge node routine on CPU in version without streams

(cherry picked from commit 520f1db1)
parent 7357f298
No related branches found
No related tags found
1 merge request!104Add Communication Hiding to GPU version
...@@ -87,8 +87,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int lev ...@@ -87,8 +87,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int lev
{ {
fineToCoarse(para.get(), level); fineToCoarse(para.get(), level);
prepareExchangeMultiGPUAfterFtoC(para.get(), level, -1); exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, true);
exchangeMultiGPUAfterFtoC(para.get(), comm, cudaManager.get(), level, -1);
coarseToFine(para.get(), level); coarseToFine(para.get(), level);
} }
...@@ -97,8 +96,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_completeExchange(int level) ...@@ -97,8 +96,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_completeExchange(int level)
{ {
fineToCoarse(para.get(), level); fineToCoarse(para.get(), level);
prepareExchangeMultiGPU(para.get(), level, -1); exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false);
exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1);
coarseToFine(para.get(), level); coarseToFine(para.get(), level);
} }
...@@ -113,15 +111,13 @@ void UpdateGrid27::collisionAndExchange_noStreams_indexKernel(int level, unsigne ...@@ -113,15 +111,13 @@ void UpdateGrid27::collisionAndExchange_noStreams_indexKernel(int level, unsigne
{ {
collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndices,
para->getParD(level)->numberOfFluidNodes, -1); para->getParD(level)->numberOfFluidNodes, -1);
prepareExchangeMultiGPU(para.get(), level, -1); exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false);
exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1);
} }
void UpdateGrid27::collisionAndExchange_noStreams_oldKernel(int level, unsigned int t) void UpdateGrid27::collisionAndExchange_noStreams_oldKernel(int level, unsigned int t)
{ {
collision(para.get(), pm, level, t, kernels); collision(para.get(), pm, level, t, kernels);
prepareExchangeMultiGPU(para.get(), level, -1); exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false);
exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1);
} }
void UpdateGrid27::collisionAndExchange_streams(int level, unsigned int t) void UpdateGrid27::collisionAndExchange_streams(int level, unsigned int t)
...@@ -300,6 +296,48 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa ...@@ -300,6 +296,48 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa
// 1D domain decomposition // 1D domain decomposition
// exchangePostCollDataGPU27(para, comm, level); // exchangePostCollDataGPU27(para, comm, level);
} }
void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm)
{
//////////////////////////////////////////////////////////////////////////
// 3D domain decomposition
if (useReducedComm) {
// X
prepareExchangeCollDataXGPU27AfterFtoC(para, level, -1);
exchangeCollDataXGPU27AfterFtoC(para, comm, cudaManager, level, -1);
scatterNodesFromRecvBufferXGPU27AfterFtoC(para, level, -1);
// Y
prepareExchangeCollDataYGPU27AfterFtoC(para, level, -1);
exchangeCollDataYGPU27AfterFtoC(para, comm, cudaManager, level, -1);
scatterNodesFromRecvBufferYGPU27AfterFtoC(para, level, -1);
// Z
prepareExchangeCollDataZGPU27AfterFtoC(para, level, -1);
exchangeCollDataZGPU27AfterFtoC(para, comm, cudaManager, level, -1);
scatterNodesFromRecvBufferZGPU27AfterFtoC(para, level, -1);
} else {
// X
prepareExchangeCollDataXGPU27AllNodes(para, level, -1);
exchangeCollDataXGPU27AllNodes(para, comm, cudaManager, level, -1);
scatterNodesFromRecvBufferXGPU27AllNodes(para, level, -1);
// Y
prepareExchangeCollDataYGPU27AllNodes(para, level, -1);
exchangeCollDataYGPU27AllNodes(para, comm, cudaManager, level, -1);
scatterNodesFromRecvBufferYGPU27AllNodes(para, level, -1);
// Z
prepareExchangeCollDataZGPU27AllNodes(para, level, -1);
exchangeCollDataZGPU27AllNodes(para, comm, cudaManager, level, -1);
scatterNodesFromRecvBufferZGPU27AllNodes(para, level, -1);
}
//////////////////////////////////////////////////////////////////////////
// 3D domain decomposition convection diffusion
if (para->getDiffOn()) {
if (para->getUseStreams())
std::cout << "Warning: Cuda streams not yet implemented for convection diffusion" << std::endl;
exchangePostCollDataADXGPU27(para, comm, cudaManager, level);
exchangePostCollDataADYGPU27(para, comm, cudaManager, level);
exchangePostCollDataADZGPU27(para, comm, cudaManager, level);
}
}
void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level,
int streamIndex) int streamIndex)
{ {
......
...@@ -65,6 +65,8 @@ extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, C ...@@ -65,6 +65,8 @@ extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, C
int level, int streamIndex); int level, int streamIndex);
extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
int level, int streamIndex); int level, int streamIndex);
extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm,
CudaMemoryManager *cudaManager, int level, bool useReducedComm);
extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t); extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t);
......
...@@ -220,7 +220,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe ...@@ -220,7 +220,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
if (para->getUseStreams()) cudaStreamSynchronize(stream); if (para->getUseStreams()) cudaStreamSynchronize(stream);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// edge nodes: copy received node values from x // edge nodes: copy received node values from x
if (para->getNumberOfProcessNeighborsX(level, "recv") > 0) { if (para->getUseStreams() && para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
uint indexInSubdomainX = 0; uint indexInSubdomainX = 0;
uint indexInSubdomainY = 0; uint indexInSubdomainY = 0;
uint numNodesInBufferX = 0; uint numNodesInBufferX = 0;
...@@ -323,7 +323,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe ...@@ -323,7 +323,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
if (para->getUseStreams()) cudaStreamSynchronize(stream); if (para->getUseStreams()) cudaStreamSynchronize(stream);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// edge nodes: copy received node values from x // edge nodes: copy received node values from x
if (para->getNumberOfProcessNeighborsX(level, "recv") > 0) { if (para->getUseStreams() && para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
uint indexInSubdomainX = 0; uint indexInSubdomainX = 0;
uint indexInSubdomainZ = 0; uint indexInSubdomainZ = 0;
uint numNodesInBufferX = 0; uint numNodesInBufferX = 0;
...@@ -343,7 +343,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe ...@@ -343,7 +343,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// edge nodes: copy received node values from y // edge nodes: copy received node values from y
if (para->getNumberOfProcessNeighborsY(level, "recv") > 0) { if (para->getUseStreams() && para->getNumberOfProcessNeighborsY(level, "recv") > 0) {
uint indexInSubdomainY = 0; uint indexInSubdomainY = 0;
uint indexInSubdomainZ = 0; uint indexInSubdomainZ = 0;
uint numNodesInBufferY = 0; uint numNodesInBufferY = 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment