diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 4da3c674517c02abb34a393344d8144d3673e923..18aab89a72f966c945863657a6388a87143e84eb 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -76,8 +76,7 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage if (para->getUseStreams() && para->getNumprocs() > 1) { } else { if (para->getKernelNeedsFluidNodeIndicesToRun()) { - fineToCoarseUsingIndex(para, level, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, -1); + fineToCoarseUsingIndex(para, level, -1); prepareExchangeMultiGPU(para, level, -1); exchangeMultiGPU(para, comm, cudaManager, level, -1); @@ -1139,8 +1138,7 @@ void fineToCoarse(Parameter* para, int level) } -void fineToCoarseUsingIndex(Parameter *para, int level, uint *fluidNodeIndices, uint numberOfFluidNodes, - int streamIndex) +void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); ScaleFC_RhoSq_comp_27_Stream( @@ -1151,7 +1149,7 @@ void fineToCoarseUsingIndex(Parameter *para, int level, uint *fluidNodeIndices, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, para->getParD(level)->omega, para->getParD(level + 1)->omega, para->getParD(level)->vis, para->getParD(level)->nx, para->getParD(level)->ny, para->getParD(level + 1)->nx, para->getParD(level + 1)->ny, - para->getParD(level)->numberofthreads, para->getParD(level)->offFC, fluidNodeIndices, numberOfFluidNodes, stream); + para->getParD(level)->numberofthreads, para->getParD(level)->offFC, stream); getLastCudaError("ScaleFC27_RhoSq_comp_Stream execution failed"); ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index ef3cfab2deb13f4592f958f59b0484c56f24dffe..d80a3bf6ceac091722f0d2017a27f013d9702463 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -40,8 +40,7 @@ extern "C" void calcMacroscopicQuantities(Parameter* para, int level); extern "C" void preCollisionBC(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t); extern "C" void fineToCoarse(Parameter* para, int level); -extern "C" void fineToCoarseUsingIndex(Parameter *para, int level, uint *fluidNodeIndices = nullptr, - uint numberOfFluidNodes = 0, int stream = -1); +extern "C" void fineToCoarseUsingIndex(Parameter *para, int level, int stream = -1); extern "C" void coarseToFine(Parameter* para, int level); extern "C" void coarseToFineUsingIndex(Parameter *para, int level, uint *fluidNodeIndices = nullptr, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index 1220b5ef4010bdba6d6372eea25d9b8aec2ebcfb..e23644514bc87ae74fa825be61df0a00332a8621 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -2116,9 +2116,7 @@ extern "C" void ScaleFC_RhoSq_comp_27_Stream( real* DC, unsigned int nxF, unsigned int nyF, unsigned int numberOfThreads, - OffFC offFC, - unsigned int *fluidNodeIndices, - unsigned int numberOfFluidNodes, + OffFC offFC, CUstream_st *stream); extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index fc35b1c661515f08c231be28f7041cc91a849373..288db43e7bcd36dc4d187982b86178d345601094 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -2049,14 +2049,6 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27( real* DC, unsigned int nyF, OffFC offFC); -extern "C" __global__ void -scaleFC_RhoSq_comp_27_Stream(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY, - unsigned int *neighborCZ, unsigned int *neighborFX, unsigned int *neighborFY, - unsigned int *neighborFZ, unsigned int size_MatC, unsigned int size_MatF, bool evenOrOdd, - unsigned int *posC, unsigned int *posFSWB, unsigned int kFC, real omCoarse, real omFine, - real nu, unsigned int nxC, unsigned int nyC, unsigned int nxF, unsigned int nyF, - OffFC offFC, const unsigned int *fluidNodeIndices, unsigned int numberOfFluidNodes); - extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, real* DF, unsigned int* neighborCX, diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index feab617a2172235f68cf5c04b3aea638d0cd7ef4..3885c24dc4476443d50af61cf293ac3cc8d2e12e 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -6265,25 +6265,46 @@ extern "C" void ScaleFC_RhoSq_comp_27_Stream(real * DC, unsigned int nyF, unsigned int numberOfThreads, OffFC offFC, - unsigned int *fluidNodeIndices, - unsigned int numberOfFluidNodes, CUstream_st* stream) { - int Grid = (kFC / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } else { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - scaleFC_RhoSq_comp_27_Stream<<<gridINT_FC, threads, 0 , stream>>>(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY, neighborFZ, size_MatC, size_MatF, evenOrOdd, - posC, posFSWB, kFC, omCoarse, omFine, nu, nxC, nyC, nxF, nyF, offFC, fluidNodeIndices, numberOfFluidNodes); - getLastCudaError("scaleFC_RhoSq_comp_27_Stream execution failed"); + int Grid = (kFC / numberOfThreads)+1; + int Grid1, Grid2; + if (Grid>512) + { + Grid1 = 512; + Grid2 = (Grid/Grid1)+1; + } + else + { + Grid1 = 1; + Grid2 = Grid; + } + dim3 gridINT_FC(Grid1, Grid2); + dim3 threads(numberOfThreads, 1, 1 ); + + scaleFC_RhoSq_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + size_MatC, + size_MatF, + evenOrOdd, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_RhoSq_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu index a56c75e5859d8a722e148d07643763fa31086718..f61a6f980bd8684665c32d06f53efcb7f9dc0070 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu @@ -11094,31 +11094,6 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27(real* DC, nyC, nxF, nyF, offFC, k); } -extern "C" __global__ void scaleFC_RhoSq_comp_27_Stream(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY, - unsigned int *neighborCZ, unsigned int *neighborFX, - unsigned int *neighborFY, unsigned int *neighborFZ, - unsigned int size_MatC, unsigned int size_MatF, bool evenOrOdd, - unsigned int *posC, unsigned int *posFSWB, unsigned int kFC, - real omCoarse, real omFine, real nu, unsigned int nxC, unsigned int nyC, unsigned int nxF, unsigned int nyF, - OffFC offFC, const unsigned int *fluidNodeIndices, unsigned int numberOfFluidNodes) -{ - //////////////////////////////////////////////////////////////////////////////// - const unsigned ix = threadIdx.x; // Globaler x-Index - const unsigned iy = blockIdx.x; // Globaler y-Index - const unsigned iz = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k_thread = nx * (ny * iz + iy) + ix; - - if (k_thread < numberOfFluidNodes) { - const unsigned k = fluidNodeIndices[k_thread]; - scaleFC_RhoSq_comp_27_Calculation(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY, - neighborFZ, size_MatC, size_MatF, evenOrOdd, posC, posFSWB, kFC, omCoarse, - omFine, nu, nxC, nyC, nxF, nyF, offFC, k); - } -}