Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
V
VirtualFluids
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
iRMB
VirtualFluids
Commits
9a018cb6
Commit
9a018cb6
authored
2 years ago
by
Anna Wellmann
Browse files
Options
Downloads
Patches
Plain Diff
Reformat ExchangeData27 and ExchangeData27Test
parent
440a31a2
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!104
Add Communication Hiding to GPU version
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+72
-65
72 additions, 65 deletions
src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
src/gpu/VirtualFluids_GPU/Communication/ExchangeData27Test.cpp
+3
-4
3 additions, 4 deletions
...pu/VirtualFluids_GPU/Communication/ExchangeData27Test.cpp
with
75 additions
and
69 deletions
src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+
72
−
65
View file @
9a018cb6
#include
"Communication/ExchangeData27.h"
#include
"Parameter/CudaStreamManager.h"
#include
<cuda_runtime.h>
#include
<helper_cuda.h>
#include
"Parameter/CudaStreamManager.h"
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//3D domain decomposition
//
3D domain decomposition
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// 3D domain decomposition: functions used by all directions
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void
collectNodesInSendBufferGPU
(
Parameter
*
para
,
int
level
,
int
streamIndex
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighbor
,
unsigned
int
numberOfSendProcessNeighbors
)
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighbor
,
unsigned
int
numberOfSendProcessNeighbors
)
{
cudaStream_t
stream
=
(
streamIndex
==
-
1
)
?
CU_STREAM_LEGACY
:
para
->
getStreamManager
()
->
getStream
(
streamIndex
);
...
...
@@ -30,8 +30,8 @@ void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
}
void
scatterNodesFromRecvBufferGPU
(
Parameter
*
para
,
int
level
,
int
streamIndex
,
std
::
vector
<
ProcessNeighbor27
>
*
recvProcessNeighborDev
,
unsigned
int
numberOfRecvProcessNeighbors
)
std
::
vector
<
ProcessNeighbor27
>
*
recvProcessNeighborDev
,
unsigned
int
numberOfRecvProcessNeighbors
)
{
cudaStream_t
stream
=
(
streamIndex
==
-
1
)
?
CU_STREAM_LEGACY
:
para
->
getStreamManager
()
->
getStream
(
streamIndex
);
for
(
unsigned
int
i
=
0
;
i
<
numberOfRecvProcessNeighbors
;
i
++
)
{
...
...
@@ -69,16 +69,15 @@ void startNonBlockingMpiReceive(unsigned int numberOfSendProcessNeighbors, vf::g
}
}
void
copyEdgeNodes
(
std
::
vector
<
LBMSimulationParameter
::
EdgeNodePositions
>
&
edgeNodes
,
std
::
vector
<
ProcessNeighbor27
>
&
recvProcessNeighborHost
,
std
::
vector
<
ProcessNeighbor27
>
&
sendProcessNeighborHost
)
std
::
vector
<
ProcessNeighbor27
>
&
recvProcessNeighborHost
,
std
::
vector
<
ProcessNeighbor27
>
&
sendProcessNeighborHost
)
{
int
indexInSubdomainRecv
=
0
;
int
indexInSubdomainSend
=
0
;
int
numNodesInBufferRecv
=
0
;
int
numNodesInBufferSend
=
0
;
#pragma omp parallel for
for
(
uint
i
=
0
;
i
<
edgeNodes
.
size
();
i
++
)
{
indexInSubdomainRecv
=
edgeNodes
[
i
].
indexOfProcessNeighborRecv
;
...
...
@@ -90,27 +89,28 @@ void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeN
continue
;
}
for
(
int
direction
=
0
;
direction
<=
(
int
)
dirEND
;
direction
++
)
{
(
sendProcessNeighborHost
[
indexInSubdomainSend
].
f
[
0
]
+
(
direction
*
numNodesInBufferSend
))[
edgeNodes
[
i
].
indexInSendBuffer
]
=
(
recvProcessNeighborHost
[
indexInSubdomainRecv
].
f
[
0
]
+
(
direction
*
numNodesInBufferRecv
))[
edgeNodes
[
i
].
indexInRecvBuffer
];
for
(
int
direction
=
0
;
direction
<=
(
int
)
dirEND
;
direction
++
)
{
(
sendProcessNeighborHost
[
indexInSubdomainSend
].
f
[
0
]
+
(
direction
*
numNodesInBufferSend
))[
edgeNodes
[
i
].
indexInSendBuffer
]
=
(
recvProcessNeighborHost
[
indexInSubdomainRecv
].
f
[
0
]
+
(
direction
*
numNodesInBufferRecv
))[
edgeNodes
[
i
].
indexInRecvBuffer
];
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// X
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void
prepareExchangeCollDataXGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
collectNodesInSendBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
sendProcessNeighborX
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)));
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)));
}
void
prepareExchangeCollDataXGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
collectNodesInSendBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
sendProcessNeighborsAfterFtoCX
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)));
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)));
}
void
exchangeCollDataXGPU27AllNodes
(
Parameter
*
para
,
vf
::
gpu
::
Communicator
*
comm
,
CudaMemoryManager
*
cudaManager
,
...
...
@@ -133,13 +133,13 @@ void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *com
&
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCX
);
}
void
scatterNodesFromRecvBufferXGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
void
scatterNodesFromRecvBufferXGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
scatterNodesFromRecvBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
recvProcessNeighborX
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)));
}
void
scatterNodesFromRecvBufferXGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
void
scatterNodesFromRecvBufferXGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
scatterNodesFromRecvBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
recvProcessNeighborsAfterFtoCX
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)));
...
...
@@ -154,7 +154,7 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
{
cudaStream_t
stream
=
(
streamIndex
==
-
1
)
?
CU_STREAM_LEGACY
:
para
->
getStreamManager
()
->
getStream
(
streamIndex
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//copy Device to Host
//
copy Device to Host
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
));
i
++
)
cudaManager
->
cudaCopyProcessNeighborXFsDH
(
level
,
i
,
(
*
sendProcessNeighborDev
)[
i
].
memsizeFs
,
streamIndex
);
...
...
@@ -166,35 +166,32 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
startBlockingMpiSend
((
unsigned
int
)(
*
sendProcessNeighborHost
).
size
(),
comm
,
sendProcessNeighborHost
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
W
ait
//
w
ait
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
));
i
++
)
comm
->
waitGPU
(
i
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//reset the request array
//
reset the request array
if
(
0
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
)))
comm
->
resetRequest
();
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//copy Host to Device
//
copy Host to Device
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsX
(
level
,
"send"
));
i
++
)
cudaManager
->
cudaCopyProcessNeighborXFsHD
(
level
,
i
,
(
*
recvProcessNeighborDev
)[
i
].
memsizeFs
,
streamIndex
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Y
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void
prepareExchangeCollDataYGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
collectNodesInSendBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
sendProcessNeighborY
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)));
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)));
}
void
prepareExchangeCollDataYGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
collectNodesInSendBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
sendProcessNeighborsAfterFtoCY
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)));
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)));
}
void
exchangeCollDataYGPU27AllNodes
(
Parameter
*
para
,
vf
::
gpu
::
Communicator
*
comm
,
CudaMemoryManager
*
cudaManager
,
...
...
@@ -206,7 +203,7 @@ void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator *comm
&
para
->
getParH
(
level
)
->
sendProcessNeighborY
,
&
para
->
getParH
(
level
)
->
recvProcessNeighborY
);
}
void
exchangeCollDataYGPU27AfterFtoC
(
Parameter
*
para
,
vf
::
gpu
::
Communicator
*
comm
,
CudaMemoryManager
*
cudaManager
,
int
level
,
int
streamIndex
)
{
...
...
@@ -217,28 +214,27 @@ void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator *com
&
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCY
);
}
void
scatterNodesFromRecvBufferYGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
void
scatterNodesFromRecvBufferYGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
scatterNodesFromRecvBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
recvProcessNeighborY
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)));
}
void
scatterNodesFromRecvBufferYGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
void
scatterNodesFromRecvBufferYGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
scatterNodesFromRecvBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
recvProcessNeighborsAfterFtoCY
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)));
}
void
exchangeCollDataYGPU27
(
Parameter
*
para
,
vf
::
gpu
::
Communicator
*
comm
,
CudaMemoryManager
*
cudaManager
,
int
level
,
int
streamIndex
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighborDev
,
int
streamIndex
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighborDev
,
std
::
vector
<
ProcessNeighbor27
>
*
recvProcessNeighborDev
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighborHost
,
std
::
vector
<
ProcessNeighbor27
>
*
recvProcessNeighborHost
)
{
cudaStream_t
stream
=
(
streamIndex
==
-
1
)
?
CU_STREAM_LEGACY
:
para
->
getStreamManager
()
->
getStream
(
streamIndex
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//copy Device to Host
//
copy Device to Host
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
));
i
++
)
cudaManager
->
cudaCopyProcessNeighborYFsDH
(
level
,
i
,
(
*
sendProcessNeighborDev
)[
i
].
memsizeFs
,
streamIndex
);
...
...
@@ -246,49 +242,52 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
startNonBlockingMpiReceive
((
unsigned
int
)(
*
sendProcessNeighborHost
).
size
(),
comm
,
recvProcessNeighborHost
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// wait for memcopy device to host to finish before sending data
if
(
para
->
getUseStreams
())
cudaStreamSynchronize
(
stream
);
if
(
para
->
getUseStreams
())
cudaStreamSynchronize
(
stream
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// edge nodes: copy received node values from x
if
(
para
->
getUseStreams
()
&&
para
->
getNumberOfProcessNeighborsX
(
level
,
"recv"
)
>
0
&&
para
->
getParH
(
level
)
->
sendProcessNeighborY
.
size
()
!=
0
)
{
if
(
para
->
getParH
(
level
)
->
sendProcessNeighborY
[
0
].
numberOfNodes
==
(
*
sendProcessNeighborHost
)[
0
].
numberOfNodes
){
if
(
para
->
getUseStreams
()
&&
para
->
getNumberOfProcessNeighborsX
(
level
,
"recv"
)
>
0
&&
para
->
getParH
(
level
)
->
sendProcessNeighborY
.
size
()
!=
0
)
{
if
(
para
->
getParH
(
level
)
->
sendProcessNeighborY
[
0
].
numberOfNodes
==
(
*
sendProcessNeighborHost
)[
0
].
numberOfNodes
)
{
// check if in communication of all nodes (as opposed to reduced communication after fine to coarse)
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoY
,
para
->
getParH
(
level
)
->
recvProcessNeighborX
,
*
sendProcessNeighborHost
);
}
else
{
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoY
,
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCX
,
*
sendProcessNeighborHost
);
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoY
,
para
->
getParH
(
level
)
->
recvProcessNeighborX
,
*
sendProcessNeighborHost
);
}
else
{
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoY
,
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCX
,
*
sendProcessNeighborHost
);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
startBlockingMpiSend
((
unsigned
int
)(
*
sendProcessNeighborHost
).
size
(),
comm
,
sendProcessNeighborHost
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
W
ait
//
w
ait
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
));
i
++
)
comm
->
waitGPU
(
i
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//reset the request array
//
reset the request array
if
(
0
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
)))
comm
->
resetRequest
();
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//copy Host to Device
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
));
i
++
)
{
// copy Host to Device
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsY
(
level
,
"send"
));
i
++
)
{
cudaManager
->
cudaCopyProcessNeighborYFsHD
(
level
,
i
,
(
*
recvProcessNeighborDev
)[
i
].
memsizeFs
,
streamIndex
);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Z
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void
prepareExchangeCollDataZGPU27AllNodes
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
collectNodesInSendBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
sendProcessNeighborZ
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
)));
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
)));
}
void
prepareExchangeCollDataZGPU27AfterFtoC
(
Parameter
*
para
,
int
level
,
int
streamIndex
)
{
collectNodesInSendBufferGPU
(
para
,
level
,
streamIndex
,
&
para
->
getParD
(
level
)
->
sendProcessNeighborsAfterFtoCZ
,
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
)));
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
)));
}
void
exchangeCollDataZGPU27AllNodes
(
Parameter
*
para
,
vf
::
gpu
::
Communicator
*
comm
,
CudaMemoryManager
*
cudaManager
,
...
...
@@ -324,52 +323,60 @@ void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int s
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void
exchangeCollDataZGPU27
(
Parameter
*
para
,
vf
::
gpu
::
Communicator
*
comm
,
CudaMemoryManager
*
cudaManager
,
int
level
,
int
streamIndex
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighborDev
,
int
streamIndex
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighborDev
,
std
::
vector
<
ProcessNeighbor27
>
*
recvProcessNeighborDev
,
std
::
vector
<
ProcessNeighbor27
>
*
sendProcessNeighborHost
,
std
::
vector
<
ProcessNeighbor27
>
*
recvProcessNeighborHost
)
{
cudaStream_t
stream
=
(
streamIndex
==
-
1
)
?
CU_STREAM_LEGACY
:
para
->
getStreamManager
()
->
getStream
(
streamIndex
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//copy Device to Host
//
copy Device to Host
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
));
i
++
)
cudaManager
->
cudaCopyProcessNeighborZFsDH
(
level
,
i
,
(
*
sendProcessNeighborDev
)[
i
].
memsizeFs
,
streamIndex
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
startNonBlockingMpiReceive
((
unsigned
int
)(
*
sendProcessNeighborHost
).
size
(),
comm
,
recvProcessNeighborHost
);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// wait for memcopy device to host to finish before sending data
if
(
para
->
getUseStreams
())
cudaStreamSynchronize
(
stream
);
if
(
para
->
getUseStreams
())
cudaStreamSynchronize
(
stream
);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// edge nodes: copy received node values from x
if
(
para
->
getUseStreams
()
&&
para
->
getNumberOfProcessNeighborsX
(
level
,
"recv"
)
>
0
&&
para
->
getParH
(
level
)
->
sendProcessNeighborZ
.
size
()
!=
0
)
{
if
(
para
->
getParH
(
level
)
->
sendProcessNeighborZ
[
0
].
numberOfNodes
==
(
*
sendProcessNeighborHost
)[
0
].
numberOfNodes
){
// check if in communication of all nodes (as opposed to reduced communication after fine to coarse)
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborX
,
*
sendProcessNeighborHost
);
}
else
{
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCX
,
*
sendProcessNeighborHost
);
if
(
para
->
getUseStreams
()
&&
para
->
getNumberOfProcessNeighborsX
(
level
,
"recv"
)
>
0
&&
para
->
getParH
(
level
)
->
sendProcessNeighborZ
.
size
()
!=
0
)
{
if
(
para
->
getParH
(
level
)
->
sendProcessNeighborZ
[
0
].
numberOfNodes
==
(
*
sendProcessNeighborHost
)[
0
].
numberOfNodes
)
{
// check if in communication of all nodes (as opposed to reduced communication after fine to coarse)
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborX
,
*
sendProcessNeighborHost
);
}
else
{
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesXtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCX
,
*
sendProcessNeighborHost
);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// edge nodes: copy received node values from y
if
(
para
->
getUseStreams
()
&&
para
->
getNumberOfProcessNeighborsY
(
level
,
"recv"
)
>
0
&&
para
->
getParH
(
level
)
->
sendProcessNeighborZ
.
size
()
!=
0
)
{
if
(
para
->
getParH
(
level
)
->
sendProcessNeighborZ
[
0
].
numberOfNodes
==
(
*
sendProcessNeighborHost
)[
0
].
numberOfNodes
){
if
(
para
->
getUseStreams
()
&&
para
->
getNumberOfProcessNeighborsY
(
level
,
"recv"
)
>
0
&&
para
->
getParH
(
level
)
->
sendProcessNeighborZ
.
size
()
!=
0
)
{
if
(
para
->
getParH
(
level
)
->
sendProcessNeighborZ
[
0
].
numberOfNodes
==
(
*
sendProcessNeighborHost
)[
0
].
numberOfNodes
)
{
// check if in communication of all nodes (as opposed to reduced communication after fine to coarse)
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesYtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborY
,
*
sendProcessNeighborHost
);
}
else
{
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesYtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCY
,
*
sendProcessNeighborHost
);
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesYtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborY
,
*
sendProcessNeighborHost
);
}
else
{
copyEdgeNodes
(
para
->
getParH
(
level
)
->
edgeNodesYtoZ
,
para
->
getParH
(
level
)
->
recvProcessNeighborsAfterFtoCY
,
*
sendProcessNeighborHost
);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
startBlockingMpiSend
((
unsigned
int
)(
*
sendProcessNeighborHost
).
size
(),
comm
,
sendProcessNeighborHost
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
W
ait
//
w
ait
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
));
i
++
)
comm
->
waitGPU
(
i
);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//reset the request array
//
reset the request array
if
(
0
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
)))
comm
->
resetRequest
();
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//copy Host to Device
//
copy Host to Device
for
(
unsigned
int
i
=
0
;
i
<
(
unsigned
int
)(
para
->
getNumberOfProcessNeighborsZ
(
level
,
"send"
));
i
++
)
{
cudaManager
->
cudaCopyProcessNeighborZFsHD
(
level
,
i
,
(
*
recvProcessNeighborDev
)[
i
].
memsizeFs
,
streamIndex
);
...
...
This diff is collapsed.
Click to expand it.
src/gpu/VirtualFluids_GPU/Communication/ExchangeData27Test.cpp
+
3
−
4
View file @
9a018cb6
...
...
@@ -31,7 +31,7 @@ protected:
int
level
=
0
;
int
numNodes
=
10
;
std
::
vector
<
real
>
recvFs
;
std
::
vector
<
real
>
sendFs
;
std
::
vector
<
real
>
sendFs
;
std
::
vector
<
ProcessNeighbor27
>
sendProcessNeighborHost
;
std
::
vector
<
ProcessNeighbor27
>
recvProcessNeighborHost
;
...
...
@@ -47,15 +47,14 @@ protected:
para
->
getParH
(
level
)
->
edgeNodesXtoZ
.
emplace_back
(
0
,
7
,
0
,
8
);
para
->
getParH
(
level
)
->
edgeNodesXtoZ
.
emplace_back
(
0
,
7
,
0
,
8
);
para
->
getParH
(
level
)
->
edgeNodesXtoZ
.
emplace_back
(
0
,
7
,
0
,
8
);
}
void
setUpRecvProcessNeighbors
(
int
numberOfNodesInRecv
)
void
setUpRecvProcessNeighbors
(
int
numberOfNodesInRecv
)
{
recvFs
.
resize
(
numberOfNodesInRecv
);
std
::
fill
(
recvFs
.
begin
(),
recvFs
.
end
(),
0.5
);
// 0.5s should not be copied
for
(
LBMSimulationParameter
::
EdgeNodePositions
edgeNode
:
para
->
getParH
(
level
)
->
edgeNodesXtoZ
)
{
if
(
edgeNode
.
indexInRecvBuffer
>
numberOfNodesInRecv
){
if
(
edgeNode
.
indexInRecvBuffer
>
numberOfNodesInRecv
)
{
continue
;
}
recvFs
[
edgeNode
.
indexInRecvBuffer
]
=
0.1
;
// 0.1s should be copied
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment