diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index d28f7f4e4175d6a9f75ddc8edac8410a08560068..5726f1e6e52597154de29958246b0d715a7f5d08 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -77,8 +77,8 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage } else { //fineToCoarse(para, level); - fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC, - para->getParD(level)->intFCBorder.kFC, -1); + //fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC, + //para->getParD(level)->intFCBorder.kFC, -1); fineToCoarseWithStream(para, level, para->getParD(level)->intFCBulk.ICellFCC, para->getParD(level)->intFCBulk.kFC, -1); @@ -988,7 +988,7 @@ void fineToCoarse(Parameter* para, int level) para->getParD(level)->K_FC, para->getParD(level)->omega, para->getParD(level+1)->omega, para->getParD(level)->vis, para->getParD(level)->nx, para->getParD(level)->ny, para->getParD(level+1)->nx, para->getParD(level+1)->ny, para->getParD(level)->numberofthreads, - para->getParD(level)->offFC); + para->getParD(level)->offFC, CU_STREAM_LEGACY); getLastCudaError("ScaleFC27_RhoSq_comp execution failed"); //ScaleFC_AA2016_comp_27( para->getParD(level)->d0SP.f[0], para->getParD(level+1)->d0SP.f[0], @@ -1136,17 +1136,17 @@ void fineToCoarse(Parameter* para, int level) void fineToCoarseWithStream(Parameter *para, int level, uint *iCellFCC, uint k_FC, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + + ScaleFC_RhoSq_comp_27( para->getParD(level)->d0SP.f[0], para->getParD(level+1)->d0SP.f[0], + para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + para->getParD(level+1)->neighborX_SP, para->getParD(level+1)->neighborY_SP, para->getParD(level+1)->neighborZ_SP, + para->getParD(level)->size_Mat_SP, para->getParD(level+1)->size_Mat_SP, para->getParD(level)->evenOrOdd, + iCellFCC, para->getParD(level)->intFC.ICellFCF, + k_FC, para->getParD(level)->omega, para->getParD(level + 1)->omega, + para->getParD(level)->vis, para->getParD(level)->nx, para->getParD(level)->ny, + para->getParD(level+1)->nx, para->getParD(level+1)->ny, para->getParD(level)->numberofthreads, + para->getParD(level)->offFC, stream); getLastCudaError("ScaleFC27_RhoSq_comp execution failed"); - ScaleFC_RhoSq_comp_27_Stream( para->getParD(level)->d0SP.f[0], para->getParD(level+1)->d0SP.f[0], - para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, - para->getParD(level+1)->neighborX_SP, para->getParD(level+1)->neighborY_SP, para->getParD(level+1)->neighborZ_SP, - para->getParD(level)->size_Mat_SP, para->getParD(level+1)->size_Mat_SP, para->getParD(level)->evenOrOdd, - iCellFCC, para->getParD(level)->intFC.ICellFCF, - k_FC, para->getParD(level)->omega, para->getParD(level + 1)->omega, - para->getParD(level)->vis, para->getParD(level)->nx, para->getParD(level)->ny, - para->getParD(level+1)->nx, para->getParD(level+1)->ny, para->getParD(level)->numberofthreads, - para->getParD(level)->offFC, stream); - getLastCudaError("ScaleFC27_RhoSq_comp_Stream execution failed"); ////////////////////////////////////////////////////////////////////////// // A D V E C T I O N D I F F U S I O N diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index e23644514bc87ae74fa825be61df0a00332a8621..5b7a32481bd076c70eb62f7a7fdfe8afa32936a7 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -2091,33 +2091,9 @@ extern "C" void ScaleFC_RhoSq_comp_27( real* DC, unsigned int nyC, unsigned int nxF, unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC); - -extern "C" void ScaleFC_RhoSq_comp_27_Stream( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool evenOrOdd, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC, - CUstream_st *stream); + unsigned int numberOfThreads, + OffFC offFC, + CUstream_st *stream); extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, real* DF, diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index 3885c24dc4476443d50af61cf293ac3cc8d2e12e..6e072c6dc490c7671aa81bcfe14df7d27683cd16 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -6201,7 +6201,8 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC, unsigned int nxF, unsigned int nyF, unsigned int numberOfThreads, - OffFC offFC) + OffFC offFC, + CUstream_st *stream) { int Grid = (kFC / numberOfThreads)+1; int Grid1, Grid2; @@ -6218,7 +6219,8 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC, dim3 gridINT_FC(Grid1, Grid2); dim3 threads(numberOfThreads, 1, 1 ); - scaleFC_RhoSq_comp_27<<< gridINT_FC, threads >>>(DC, + scaleFC_RhoSq_comp_27<<<gridINT_FC, threads, 0, stream>>>( + DC, DF, neighborCX, neighborCY, @@ -6242,70 +6244,7 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC, offFC); getLastCudaError("scaleFC_RhoSq_27 execution failed"); } -extern "C" void ScaleFC_RhoSq_comp_27_Stream(real * DC, - real * DF, - unsigned int * neighborCX, - unsigned int * neighborCY, - unsigned int * neighborCZ, - unsigned int * neighborFX, - unsigned int * neighborFY, - unsigned int * neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool evenOrOdd, - unsigned int * posC, - unsigned int * posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC, - CUstream_st* stream) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - scaleFC_RhoSq_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - evenOrOdd, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_RhoSq_27 execution failed"); -} ////////////////////////////////////////////////////////////////////////// extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, real* DF,