diff --git a/src/gpu/core/Calculation/Calculation.h b/src/gpu/core/Calculation/Calculation.h index 2921f13b718a75326348f898a5578cf277a793a0..09acd70739d1103424ba0c190a904cac7e26f56f 100644 --- a/src/gpu/core/Calculation/Calculation.h +++ b/src/gpu/core/Calculation/Calculation.h @@ -111,12 +111,6 @@ struct ICellNeigh using InterpolationCellNeighbor = ICellNeigh; -// ADD IN FUTURE RELEASE -struct Distributions6 -{ - real* g[6]; -}; - // ADD IN FUTURE RELEASE struct Distributions7 { @@ -243,18 +237,6 @@ struct ProcessNeighbor27 int numberOfFs; }; -// ADD IN FUTURE RELEASE -struct ProcessNeighborF3 -{ - real* g[6]; - uint memsizeGs; - int* index; - uint memsizeIndex; - uint rankNeighbor; - int numberOfNodes; - int numberOfGs; -}; - ////////////////////////////////////////////////////////////////////////// // DEPRECATED inline int vectorPosition(int i, int j, int k, int Lx, int Ly) diff --git a/src/gpu/core/Communication/ExchangeData27.cpp b/src/gpu/core/Communication/ExchangeData27.cpp index 2c7b1cc6bc5148b9f1f77fba156757f9fdc29e81..00f669a449bbd290acffae095e286d84594c7a02 100644 --- a/src/gpu/core/Communication/ExchangeData27.cpp +++ b/src/gpu/core/Communication/ExchangeData27.cpp @@ -1285,276 +1285,4 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::parallel::Communicator& c } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//3D domain decomposition F3 - K18/K20 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// X -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataF3XGPU(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level) -{ - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //copy Device to Host - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) - { - getSendGsDevF3( - para->getParD(level)->g6.g[0], - para->getParD(level)->sendProcessNeighborF3X[i].g[0], - para->getParD(level)->sendProcessNeighborF3X[i].index, - para->getParD(level)->sendProcessNeighborF3X[i].numberOfNodes, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep, - para->getParD(level)->numberofthreads); - ////////////////////////////////////////////////////////////////////////// - cudaMemoryManager->cudaCopyProcessNeighborF3XFsDH(level, i); - } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //start non blocking MPI receive - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) - { - comm.receiveNonBlocking( - para->getParH(level)->recvProcessNeighborF3X[i].g[0], - para->getParH(level)->recvProcessNeighborF3X[i].numberOfGs, - para->getParH(level)->recvProcessNeighborF3X[i].rankNeighbor); - } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //start blocking MPI send - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) - { - comm.send( - para->getParH(level)->sendProcessNeighborF3X[i].g[0], - para->getParH(level)->sendProcessNeighborF3X[i].numberOfGs, - para->getParH(level)->sendProcessNeighborF3X[i].rankNeighbor); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //Wait - comm.waitAll(); - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //reset the request array - if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) - { - comm.resetRequests(); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //copy Host to Device - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) - { - cudaMemoryManager->cudaCopyProcessNeighborF3XFsHD(level, i); - ////////////////////////////////////////////////////////////////////////// - setRecvGsDevF3( - para->getParD(level)->g6.g[0], - para->getParD(level)->recvProcessNeighborF3X[i].g[0], - para->getParD(level)->recvProcessNeighborF3X[i].index, - para->getParD(level)->recvProcessNeighborF3X[i].numberOfNodes, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep, - para->getParD(level)->numberofthreads); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Y -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataF3YGPU(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level) -{ - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //copy Device to Host - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) - { - getSendGsDevF3( - para->getParD(level)->g6.g[0], - para->getParD(level)->sendProcessNeighborF3Y[i].g[0], - para->getParD(level)->sendProcessNeighborF3Y[i].index, - para->getParD(level)->sendProcessNeighborF3Y[i].numberOfNodes, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep, - para->getParD(level)->numberofthreads); - ////////////////////////////////////////////////////////////////////////// - cudaMemoryManager->cudaCopyProcessNeighborF3YFsDH(level, i); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //start non blocking MPI receive - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) - { - comm.receiveNonBlocking( - para->getParH(level)->recvProcessNeighborF3Y[i].g[0], - para->getParH(level)->recvProcessNeighborF3Y[i].numberOfGs, - para->getParH(level)->recvProcessNeighborF3Y[i].rankNeighbor); - } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //start blocking MPI send - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) - { - comm.send( - para->getParH(level)->sendProcessNeighborF3Y[i].g[0], - para->getParH(level)->sendProcessNeighborF3Y[i].numberOfGs, - para->getParH(level)->sendProcessNeighborF3Y[i].rankNeighbor); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //Wait - comm.waitAll(); - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //reset the request array - if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) - { - comm.resetRequests(); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //copy Host to Device - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) - { - cudaMemoryManager->cudaCopyProcessNeighborF3YFsHD(level, i); - ////////////////////////////////////////////////////////////////////////// - setRecvGsDevF3( - para->getParD(level)->g6.g[0], - para->getParD(level)->recvProcessNeighborF3Y[i].g[0], - para->getParD(level)->recvProcessNeighborF3Y[i].index, - para->getParD(level)->recvProcessNeighborF3Y[i].numberOfNodes, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep, - para->getParD(level)->numberofthreads); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Z -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataF3ZGPU(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level) -{ - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //copy Device to Host - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) - { - getSendGsDevF3( - para->getParD(level)->g6.g[0], - para->getParD(level)->sendProcessNeighborF3Z[i].g[0], - para->getParD(level)->sendProcessNeighborF3Z[i].index, - para->getParD(level)->sendProcessNeighborF3Z[i].numberOfNodes, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep, - para->getParD(level)->numberofthreads); - ////////////////////////////////////////////////////////////////////////// - cudaMemoryManager->cudaCopyProcessNeighborF3ZFsDH(level, i); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //start non blocking MPI receive - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) - { - comm.receiveNonBlocking( - para->getParH(level)->recvProcessNeighborF3Z[i].g[0], - para->getParH(level)->recvProcessNeighborF3Z[i].numberOfGs, - para->getParH(level)->recvProcessNeighborF3Z[i].rankNeighbor); - } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //start blocking MPI send - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) - { - comm.send( - para->getParH(level)->sendProcessNeighborF3Z[i].g[0], - para->getParH(level)->sendProcessNeighborF3Z[i].numberOfGs, - para->getParH(level)->sendProcessNeighborF3Z[i].rankNeighbor); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //Wait - comm.waitAll(); - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //reset the request array - if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) - { - comm.resetRequests(); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //copy Host to Device - for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) - { - cudaMemoryManager->cudaCopyProcessNeighborF3ZFsHD(level, i); - ////////////////////////////////////////////////////////////////////////// - setRecvGsDevF3( - para->getParD(level)->g6.g[0], - para->getParD(level)->recvProcessNeighborF3Z[i].g[0], - para->getParD(level)->recvProcessNeighborF3Z[i].index, - para->getParD(level)->recvProcessNeighborF3Z[i].numberOfNodes, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep, - para->getParD(level)->numberofthreads); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// \ No newline at end of file diff --git a/src/gpu/core/Communication/ExchangeData27_Device.cu b/src/gpu/core/Communication/ExchangeData27_Device.cu index e1a524a85314d30c3717b58e995df8675feae984..ccfa47ba7d9500fec70d875b06b49e78da4901ea 100644 --- a/src/gpu/core/Communication/ExchangeData27_Device.cu +++ b/src/gpu/core/Communication/ExchangeData27_Device.cu @@ -811,149 +811,6 @@ __global__ void setRecvFsPre27(real* DD, } } -__global__ void getSendGsF3( - real* G6, - real* bufferGs, - int* sendIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) -{ - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k < buffmax) - { - //////////////////////////////////////////////////////////////////////////////// - //set index - unsigned int kIndex = sendIndex[k]; - unsigned int kr = kIndex; - unsigned int kw = neighborX[kIndex]; - unsigned int ks = neighborY[kIndex]; - unsigned int kb = neighborZ[kIndex]; - //////////////////////////////////////////////////////////////////////////////// - //set Pointer for Gs - Distributions6 G; - if (isEvenTimestep) - { - G.g[dP00] = &G6[dP00 * numberOfLBnodes]; - G.g[dM00] = &G6[dM00 * numberOfLBnodes]; - G.g[d0P0] = &G6[d0P0 * numberOfLBnodes]; - G.g[d0M0] = &G6[d0M0 * numberOfLBnodes]; - G.g[d00P] = &G6[d00P * numberOfLBnodes]; - G.g[d00M] = &G6[d00M * numberOfLBnodes]; - } - else - { - G.g[dM00] = &G6[dP00 * numberOfLBnodes]; - G.g[dP00] = &G6[dM00 * numberOfLBnodes]; - G.g[d0M0] = &G6[d0P0 * numberOfLBnodes]; - G.g[d0P0] = &G6[d0M0 * numberOfLBnodes]; - G.g[d00M] = &G6[d00P * numberOfLBnodes]; - G.g[d00P] = &G6[d00M * numberOfLBnodes]; - } - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //set Pointer for Buffer Gs - Distributions6 Dbuff; - Dbuff.g[dP00] = &bufferGs[dP00 * buffmax]; - Dbuff.g[dM00] = &bufferGs[dM00 * buffmax]; - Dbuff.g[d0P0] = &bufferGs[d0P0 * buffmax]; - Dbuff.g[d0M0] = &bufferGs[d0M0 * buffmax]; - Dbuff.g[d00P] = &bufferGs[d00P * buffmax]; - Dbuff.g[d00M] = &bufferGs[d00M * buffmax]; - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //write Gs to buffer - (Dbuff.g[dP00])[k] = (G.g[dM00])[kw]; - (Dbuff.g[dM00])[k] = (G.g[dP00])[kr]; - (Dbuff.g[d0P0])[k] = (G.g[d0M0])[ks]; - (Dbuff.g[d0M0])[k] = (G.g[d0P0])[kr]; - (Dbuff.g[d00P])[k] = (G.g[d00M])[kb]; - (Dbuff.g[d00M])[k] = (G.g[d00P])[kr]; - } -} - -__global__ void setRecvGsF3( - real* G6, - real* bufferGs, - int* recvIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) -{ - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k < buffmax) - { - //////////////////////////////////////////////////////////////////////////////// - //set index - unsigned int kIndex = recvIndex[k]; - unsigned int kr = kIndex; - unsigned int kw = neighborX[kIndex]; - unsigned int ks = neighborY[kIndex]; - unsigned int kb = neighborZ[kIndex]; - //////////////////////////////////////////////////////////////////////////////// - //set Pointer for Gs - Distributions6 G; - if (isEvenTimestep) - { - G.g[dP00] = &G6[dP00 * numberOfLBnodes]; - G.g[dM00] = &G6[dM00 * numberOfLBnodes]; - G.g[d0P0] = &G6[d0P0 * numberOfLBnodes]; - G.g[d0M0] = &G6[d0M0 * numberOfLBnodes]; - G.g[d00P] = &G6[d00P * numberOfLBnodes]; - G.g[d00M] = &G6[d00M * numberOfLBnodes]; - } - else - { - G.g[dM00] = &G6[dP00 * numberOfLBnodes]; - G.g[dP00] = &G6[dM00 * numberOfLBnodes]; - G.g[d0M0] = &G6[d0P0 * numberOfLBnodes]; - G.g[d0P0] = &G6[d0M0 * numberOfLBnodes]; - G.g[d00M] = &G6[d00P * numberOfLBnodes]; - G.g[d00P] = &G6[d00M * numberOfLBnodes]; - } - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //set Pointer for Buffer Gs - Distributions6 Dbuff; - Dbuff.g[dP00] = &bufferGs[dP00 * buffmax]; - Dbuff.g[dM00] = &bufferGs[dM00 * buffmax]; - Dbuff.g[d0P0] = &bufferGs[d0P0 * buffmax]; - Dbuff.g[d0M0] = &bufferGs[d0M0 * buffmax]; - Dbuff.g[d00P] = &bufferGs[d00P * buffmax]; - Dbuff.g[d00M] = &bufferGs[d00M * buffmax]; - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //write buffer to Gs - (G.g[dM00])[kw] = (Dbuff.g[dP00])[k]; - (G.g[dP00])[kr] = (Dbuff.g[dM00])[k]; - (G.g[d0M0])[ks] = (Dbuff.g[d0P0])[k]; - (G.g[d0P0])[kr] = (Dbuff.g[d0M0])[k]; - (G.g[d00M])[kb] = (Dbuff.g[d00P])[k]; - (G.g[d00P])[kr] = (Dbuff.g[d00M])[k]; - } -} @@ -1069,56 +926,3 @@ void SetRecvFsPostDev27( getLastCudaError("setRecvFsPost27 execution failed"); } -void getSendGsDevF3( - real* G6, - real* bufferGs, - int* sendIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); - - getSendGsF3 <<< grid.grid, grid.threads >>> ( - G6, - bufferGs, - sendIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - numberOfLBnodes, - isEvenTimestep); - getLastCudaError("getSendGsF3 execution failed"); -} - -void setRecvGsDevF3( - real* G6, - real* bufferGs, - int* recvIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); - - setRecvGsF3 <<< grid.grid, grid.threads >>> ( - G6, - bufferGs, - recvIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - numberOfLBnodes, - isEvenTimestep); - getLastCudaError("setRecvGsF3 execution failed"); -} diff --git a/src/gpu/core/Cuda/CudaMemoryManager.cpp b/src/gpu/core/Cuda/CudaMemoryManager.cpp index b99d4727b90b3f9b5a089fbc1862205b73b1c38e..e6fe037d5eebd1b81333f6a85028e4f7c1768567 100644 --- a/src/gpu/core/Cuda/CudaMemoryManager.cpp +++ b/src/gpu/core/Cuda/CudaMemoryManager.cpp @@ -192,14 +192,6 @@ void CudaMemoryManager::cudaFreeSP(int lev) checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY )); checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ )); } -void CudaMemoryManager::cudaAllocF3SP(int lev) -{ - //Device - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes)); - ////////////////////////////////////////////////////////////////////////// - double tmp = (double)6 * (double)parameter->getParH(lev)->memSizeRealLBnodes; - setMemsizeGPU(tmp, false); -} @@ -723,187 +715,8 @@ void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNe checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].index )); checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0] )); } -////////////////////////////////////////////////////////////////////////// -//Process Neighbors -// 3D domain decomposition F3 -// X ///////////////////////////////////////////////////////////////////////////// -void CudaMemoryManager::cudaAllocProcessNeighborF3X(int lev, unsigned int processNeighbor) -{ - //Host - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].index), parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].g[0]), parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].memsizeGs)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].index), parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].g[0]), parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].memsizeGs)); - - //Device - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].index), parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].g[0]), parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].memsizeGs)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].index), parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].g[0]), parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].memsizeGs)); - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - double tmp = - (double)parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].memsizeIndex + (double)parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].memsizeGs + - (double)parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].memsizeIndex + (double)parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].memsizeGs; - setMemsizeGPU(tmp, false); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3XIndex(int lev, unsigned int processNeighbor) -{ - //copy send Index - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].index, - parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].index, - parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].memsizeIndex, - cudaMemcpyHostToDevice)); - //copy recv Index - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].index, - parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].index, - parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].memsizeIndex, - cudaMemcpyHostToDevice)); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3XFsHD(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].g[0], - parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].g[0], - parameter->getParD(lev)->recvProcessNeighborF3X[processNeighbor].memsizeGs, - cudaMemcpyHostToDevice)); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3XFsDH(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaMemcpy( - parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].g[0], - parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].g[0], - parameter->getParD(lev)->sendProcessNeighborF3X[processNeighbor].memsizeGs, - cudaMemcpyDeviceToHost)); -} -void CudaMemoryManager::cudaFreeProcessNeighborF3X(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].index)); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->sendProcessNeighborF3X[processNeighbor].g[0])); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].index)); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborF3X[processNeighbor].g[0])); -} -// Y ///////////////////////////////////////////////////////////////////////////// -void CudaMemoryManager::cudaAllocProcessNeighborF3Y(int lev, unsigned int processNeighbor) -{ - //Host - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].index), parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].g[0]), parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeGs)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].index), parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].g[0]), parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeGs)); - - //Device - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].index), parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].g[0]), parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeGs)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].index), parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].g[0]), parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeGs)); - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - double tmp = - (double)parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeIndex + (double)parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeGs + - (double)parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeIndex + (double)parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeGs; - setMemsizeGPU(tmp, false); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3YIndex(int lev, unsigned int processNeighbor) -{ - //copy send Index - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].index, - parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].index, - parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeIndex, - cudaMemcpyHostToDevice)); - //copy recv Index - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].index, - parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].index, - parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeIndex, - cudaMemcpyHostToDevice)); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3YFsHD(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].g[0], - parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].g[0], - parameter->getParD(lev)->recvProcessNeighborF3Y[processNeighbor].memsizeGs, - cudaMemcpyHostToDevice)); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3YFsDH(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaMemcpy( - parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].g[0], - parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].g[0], - parameter->getParD(lev)->sendProcessNeighborF3Y[processNeighbor].memsizeGs, - cudaMemcpyDeviceToHost)); -} -void CudaMemoryManager::cudaFreeProcessNeighborF3Y(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].index)); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->sendProcessNeighborF3Y[processNeighbor].g[0])); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].index)); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborF3Y[processNeighbor].g[0])); -} -// Z ///////////////////////////////////////////////////////////////////////////// -void CudaMemoryManager::cudaAllocProcessNeighborF3Z(int lev, unsigned int processNeighbor) -{ - //Host - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].index), parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].g[0]), parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeGs)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].index), parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].g[0]), parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeGs)); - //Device - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].index), parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].g[0]), parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeGs)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].index), parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeIndex)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].g[0]), parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeGs)); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - double tmp = - (double)parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeIndex + (double)parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeGs + - (double)parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeIndex + (double)parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeGs; - setMemsizeGPU(tmp, false); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3ZIndex(int lev, unsigned int processNeighbor) -{ - //copy send Index - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].index, - parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].index, - parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeIndex, - cudaMemcpyHostToDevice)); - //copy recv Index - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].index, - parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].index, - parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeIndex, - cudaMemcpyHostToDevice)); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3ZFsHD(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaMemcpy( - parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].g[0], - parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].g[0], - parameter->getParD(lev)->recvProcessNeighborF3Z[processNeighbor].memsizeGs, - cudaMemcpyHostToDevice)); -} -void CudaMemoryManager::cudaCopyProcessNeighborF3ZFsDH(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaMemcpy( - parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].g[0], - parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].g[0], - parameter->getParD(lev)->sendProcessNeighborF3Z[processNeighbor].memsizeGs, - cudaMemcpyDeviceToHost)); -} -void CudaMemoryManager::cudaFreeProcessNeighborF3Z(int lev, unsigned int processNeighbor) -{ - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].index)); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->sendProcessNeighborF3Z[processNeighbor].g[0])); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].index)); - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->recvProcessNeighborF3Z[processNeighbor].g[0])); -} -////////////////////////////////////////////////////////////////////////// void CudaMemoryManager::cudaAllocNeighborWSB(int lev) { diff --git a/src/gpu/core/Cuda/CudaMemoryManager.h b/src/gpu/core/Cuda/CudaMemoryManager.h index a23e56426afd530d03940b7b6b3af2140a61533f..999d43803fb90396c01c31457bae3c88ca0705c3 100644 --- a/src/gpu/core/Cuda/CudaMemoryManager.h +++ b/src/gpu/core/Cuda/CudaMemoryManager.h @@ -69,8 +69,6 @@ public: void cudaCopySP(int lev); void cudaFreeSP(int lev); - void cudaAllocF3SP(int lev); - void cudaAllocNeighborWSB(int lev); void cudaCopyNeighborWSB(int lev); void cudaFreeNeighborWSB(int lev); @@ -130,27 +128,6 @@ public: ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - // 3D domain decomposition F3 - void cudaAllocProcessNeighborF3X(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3XFsHD(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3XFsDH(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3XIndex(int lev, unsigned int processNeighbor); - void cudaFreeProcessNeighborF3X(int lev, unsigned int processNeighbor); - // - void cudaAllocProcessNeighborF3Y(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3YFsHD(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3YFsDH(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3YIndex(int lev, unsigned int processNeighbor); - void cudaFreeProcessNeighborF3Y(int lev, unsigned int processNeighbor); - // - void cudaAllocProcessNeighborF3Z(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3ZFsHD(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3ZFsDH(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborF3ZIndex(int lev, unsigned int processNeighbor); - void cudaFreeProcessNeighborF3Z(int lev, unsigned int processNeighbor); - ////////////////////////////////////////////////////////////////////////// - void cudaAllocTurbulentViscosity(int lev); void cudaCopyTurbulentViscosityHD(int lev); void cudaCopyTurbulentViscosityDH(int lev); diff --git a/src/gpu/core/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/core/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index 2edb77c1b76cb3879a7cc0d111aeed4ab7063610..a1b63e9a10b3b277d436515a308f999e6d3b8251 100644 --- a/src/gpu/core/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/core/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -735,208 +735,6 @@ void GridGenerator::initalValuesDomainDecompostion() } } } - - // data exchange for F3 / G6 - if ((para->getNumprocs() > 1) && (para->getIsF3())) { - for (int direction = 0; direction < 6; direction++) { - if (builder->getCommunicationProcess(direction) == INVALID_INDEX) - continue; - - for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { - if (direction == CommunicationDirections::MX || direction == CommunicationDirections::PX) { - int j = (int)para->getParH(level)->sendProcessNeighborF3X.size(); - - para->getParH(level)->sendProcessNeighborF3X.emplace_back(); - para->getParD(level)->sendProcessNeighborF3X.emplace_back(); - para->getParH(level)->recvProcessNeighborF3X.emplace_back(); - para->getParD(level)->recvProcessNeighborF3X.emplace_back(); - - int tempSend = builder->getNumberOfSendIndices(direction, level); - int tempRecv = builder->getNumberOfReceiveIndices(direction, level); - if (tempSend > 0) { - //////////////////////////////////////////////////////////////////////////////////////// - // send - VF_LOG_INFO("size of Data for X send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction)); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->sendProcessNeighborF3X.back().rankNeighbor = - builder->getCommunicationProcess(direction); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->sendProcessNeighborF3X.back().numberOfNodes = tempSend; - para->getParD(level)->sendProcessNeighborF3X.back().numberOfNodes = tempSend; - para->getParH(level)->sendProcessNeighborF3X.back().numberOfGs = 6 * tempSend; - para->getParD(level)->sendProcessNeighborF3X.back().numberOfGs = 6 * tempSend; - para->getParH(level)->sendProcessNeighborF3X.back().memsizeIndex = - sizeof(unsigned int) * tempSend; - para->getParD(level)->sendProcessNeighborF3X.back().memsizeIndex = - sizeof(unsigned int) * tempSend; - para->getParH(level)->sendProcessNeighborF3X.back().memsizeGs = - sizeof(real) * para->getParH(level)->sendProcessNeighborF3X.back().numberOfGs; - para->getParD(level)->sendProcessNeighborF3X.back().memsizeGs = - sizeof(real) * para->getParH(level)->sendProcessNeighborF3X.back().numberOfGs; - //////////////////////////////////////////////////////////////////////////////////////// - // recv - VF_LOG_INFO("size of Data for X recv buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction)); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->recvProcessNeighborF3X.back().rankNeighbor = - builder->getCommunicationProcess(direction); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->recvProcessNeighborF3X.back().numberOfNodes = tempRecv; - para->getParD(level)->recvProcessNeighborF3X.back().numberOfNodes = tempRecv; - para->getParH(level)->recvProcessNeighborF3X.back().numberOfGs = 6 * tempRecv; - para->getParD(level)->recvProcessNeighborF3X.back().numberOfGs = 6 * tempRecv; - para->getParH(level)->recvProcessNeighborF3X.back().memsizeIndex = - sizeof(unsigned int) * tempRecv; - para->getParD(level)->recvProcessNeighborF3X.back().memsizeIndex = - sizeof(unsigned int) * tempRecv; - para->getParH(level)->recvProcessNeighborF3X.back().memsizeGs = - sizeof(real) * para->getParH(level)->recvProcessNeighborF3X.back().numberOfGs; - para->getParD(level)->recvProcessNeighborF3X.back().memsizeGs = - sizeof(real) * para->getParH(level)->recvProcessNeighborF3X.back().numberOfGs; - //////////////////////////////////////////////////////////////////////////////////////// - // malloc on host and device - cudaMemoryManager->cudaAllocProcessNeighborF3X(level, j); - //////////////////////////////////////////////////////////////////////////////////////// - // init index arrays - builder->getSendIndices(para->getParH(level)->sendProcessNeighborF3X[j].index, direction, - level); - builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborF3X[j].index, direction, - level); - //////////////////////////////////////////////////////////////////////////////////////// - cudaMemoryManager->cudaCopyProcessNeighborF3XIndex(level, j); - //////////////////////////////////////////////////////////////////////////////////////// - } - } - - if (direction == CommunicationDirections::MY || direction == CommunicationDirections::PY) { - int j = (int)para->getParH(level)->sendProcessNeighborF3Y.size(); - - para->getParH(level)->sendProcessNeighborF3Y.emplace_back(); - para->getParD(level)->sendProcessNeighborF3Y.emplace_back(); - para->getParH(level)->recvProcessNeighborF3Y.emplace_back(); - para->getParD(level)->recvProcessNeighborF3Y.emplace_back(); - - int tempSend = builder->getNumberOfSendIndices(direction, level); - int tempRecv = builder->getNumberOfReceiveIndices(direction, level); - if (tempSend > 0) { - //////////////////////////////////////////////////////////////////////////////////////// - // send - VF_LOG_INFO("size of Data for Y send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction)); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->sendProcessNeighborF3Y.back().rankNeighbor = - builder->getCommunicationProcess(direction); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->sendProcessNeighborF3Y.back().numberOfNodes = tempSend; - para->getParD(level)->sendProcessNeighborF3Y.back().numberOfNodes = tempSend; - para->getParH(level)->sendProcessNeighborF3Y.back().numberOfGs = 6 * tempSend; - para->getParD(level)->sendProcessNeighborF3Y.back().numberOfGs = 6 * tempSend; - para->getParH(level)->sendProcessNeighborF3Y.back().memsizeIndex = - sizeof(unsigned int) * tempSend; - para->getParD(level)->sendProcessNeighborF3Y.back().memsizeIndex = - sizeof(unsigned int) * tempSend; - para->getParH(level)->sendProcessNeighborF3Y.back().memsizeGs = - sizeof(real) * para->getParH(level)->sendProcessNeighborF3Y.back().numberOfGs; - para->getParD(level)->sendProcessNeighborF3Y.back().memsizeGs = - sizeof(real) * para->getParH(level)->sendProcessNeighborF3Y.back().numberOfGs; - //////////////////////////////////////////////////////////////////////////////////////// - // recv - VF_LOG_INFO("size of Data for Y recv buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction)); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->recvProcessNeighborF3Y.back().rankNeighbor = - builder->getCommunicationProcess(direction); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->recvProcessNeighborF3Y.back().numberOfNodes = tempRecv; - para->getParD(level)->recvProcessNeighborF3Y.back().numberOfNodes = tempRecv; - para->getParH(level)->recvProcessNeighborF3Y.back().numberOfGs = 6 * tempRecv; - para->getParD(level)->recvProcessNeighborF3Y.back().numberOfGs = 6 * tempRecv; - para->getParH(level)->recvProcessNeighborF3Y.back().memsizeIndex = - sizeof(unsigned int) * tempRecv; - para->getParD(level)->recvProcessNeighborF3Y.back().memsizeIndex = - sizeof(unsigned int) * tempRecv; - para->getParH(level)->recvProcessNeighborF3Y.back().memsizeGs = - sizeof(real) * para->getParH(level)->recvProcessNeighborF3Y.back().numberOfGs; - para->getParD(level)->recvProcessNeighborF3Y.back().memsizeGs = - sizeof(real) * para->getParH(level)->recvProcessNeighborF3Y.back().numberOfGs; - //////////////////////////////////////////////////////////////////////////////////////// - // malloc on host and device - cudaMemoryManager->cudaAllocProcessNeighborF3Y(level, j); - //////////////////////////////////////////////////////////////////////////////////////// - // init index arrays - builder->getSendIndices(para->getParH(level)->sendProcessNeighborF3Y[j].index, direction, - level); - builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborF3Y[j].index, direction, - level); - //////////////////////////////////////////////////////////////////////////////////////// - cudaMemoryManager->cudaCopyProcessNeighborF3YIndex(level, j); - //////////////////////////////////////////////////////////////////////////////////////// - } - } - - if (direction == CommunicationDirections::MZ || direction == CommunicationDirections::PZ) { - int j = (int)para->getParH(level)->sendProcessNeighborF3Z.size(); - - para->getParH(level)->sendProcessNeighborF3Z.emplace_back(); - para->getParD(level)->sendProcessNeighborF3Z.emplace_back(); - para->getParH(level)->recvProcessNeighborF3Z.emplace_back(); - para->getParD(level)->recvProcessNeighborF3Z.emplace_back(); - - int tempSend = builder->getNumberOfSendIndices(direction, level); - int tempRecv = builder->getNumberOfReceiveIndices(direction, level); - if (tempSend > 0) { - //////////////////////////////////////////////////////////////////////////////////////// - // send - VF_LOG_INFO("size of Data for Z send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction)); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->sendProcessNeighborF3Z.back().rankNeighbor = - builder->getCommunicationProcess(direction); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->sendProcessNeighborF3Z.back().numberOfNodes = tempSend; - para->getParD(level)->sendProcessNeighborF3Z.back().numberOfNodes = tempSend; - para->getParH(level)->sendProcessNeighborF3Z.back().numberOfGs = 6 * tempSend; - para->getParD(level)->sendProcessNeighborF3Z.back().numberOfGs = 6 * tempSend; - para->getParH(level)->sendProcessNeighborF3Z.back().memsizeIndex = - sizeof(unsigned int) * tempSend; - para->getParD(level)->sendProcessNeighborF3Z.back().memsizeIndex = - sizeof(unsigned int) * tempSend; - para->getParH(level)->sendProcessNeighborF3Z.back().memsizeGs = - sizeof(real) * para->getParH(level)->sendProcessNeighborF3Z.back().numberOfGs; - para->getParD(level)->sendProcessNeighborF3Z.back().memsizeGs = - sizeof(real) * para->getParH(level)->sendProcessNeighborF3Z.back().numberOfGs; - //////////////////////////////////////////////////////////////////////////////////////// - // recv - VF_LOG_INFO("size of Data for Z recv buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction)); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->recvProcessNeighborF3Z.back().rankNeighbor = - builder->getCommunicationProcess(direction); - //////////////////////////////////////////////////////////////////////////////////////// - para->getParH(level)->recvProcessNeighborF3Z.back().numberOfNodes = tempRecv; - para->getParD(level)->recvProcessNeighborF3Z.back().numberOfNodes = tempRecv; - para->getParH(level)->recvProcessNeighborF3Z.back().numberOfGs = 6 * tempRecv; - para->getParD(level)->recvProcessNeighborF3Z.back().numberOfGs = 6 * tempRecv; - para->getParH(level)->recvProcessNeighborF3Z.back().memsizeIndex = - sizeof(unsigned int) * tempRecv; - para->getParD(level)->recvProcessNeighborF3Z.back().memsizeIndex = - sizeof(unsigned int) * tempRecv; - para->getParH(level)->recvProcessNeighborF3Z.back().memsizeGs = - sizeof(real) * para->getParH(level)->recvProcessNeighborF3Z.back().numberOfGs; - para->getParD(level)->recvProcessNeighborF3Z.back().memsizeGs = - sizeof(real) * para->getParH(level)->recvProcessNeighborF3Z.back().numberOfGs; - //////////////////////////////////////////////////////////////////////////////////////// - // malloc on host and device - cudaMemoryManager->cudaAllocProcessNeighborF3Z(level, j); - //////////////////////////////////////////////////////////////////////////////////////// - // init index arrays - builder->getSendIndices(para->getParH(level)->sendProcessNeighborF3Z[j].index, direction, - level); - builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborF3Z[j].index, direction, - level); - //////////////////////////////////////////////////////////////////////////////////////// - cudaMemoryManager->cudaCopyProcessNeighborF3ZIndex(level, j); - //////////////////////////////////////////////////////////////////////////////////////// - } - } - } - } - } } void GridGenerator::allocArrays_BoundaryQs() diff --git a/src/gpu/core/Parameter/Parameter.cpp b/src/gpu/core/Parameter/Parameter.cpp index 79f3b77be164e95f365ce8cf71b8efd2a41ce93f..2eca26826ffa8cc59d669e8a657d21de8c52449f 100644 --- a/src/gpu/core/Parameter/Parameter.cpp +++ b/src/gpu/core/Parameter/Parameter.cpp @@ -945,11 +945,6 @@ void Parameter::setHasWallModelMonitor(bool hasWallModelMonitor) this->hasWallModelMonitor = hasWallModelMonitor; } -void Parameter::setIsF3(bool isF3) -{ - this->isF3 = isF3; -} - void Parameter::setIsBodyForce(bool isBodyForce) { this->isBodyForce = isBodyForce; @@ -2284,11 +2279,6 @@ bool Parameter::getUseInitNeq() return this->isInitNeq; } -bool Parameter::getIsF3() -{ - return this->isF3; -} - bool Parameter::getIsBodyForce() { return this->isBodyForce; diff --git a/src/gpu/core/Parameter/Parameter.h b/src/gpu/core/Parameter/Parameter.h index 6e27f33d2b3edc04a4aebf10d289765b3d0a5370..504733a02e76b1d7aa7968310695e8d35154b2bf 100644 --- a/src/gpu/core/Parameter/Parameter.h +++ b/src/gpu/core/Parameter/Parameter.h @@ -198,14 +198,6 @@ struct LBMSimulationParameter { std::vector<ProcessNeighbor27> recvProcessNeighborADX; std::vector<ProcessNeighbor27> recvProcessNeighborADY; std::vector<ProcessNeighbor27> recvProcessNeighborADZ; - /////////////////////////////////////////////////////// - // 3D domain decomposition F3 - std::vector<ProcessNeighborF3> sendProcessNeighborF3X; - std::vector<ProcessNeighborF3> sendProcessNeighborF3Y; - std::vector<ProcessNeighborF3> sendProcessNeighborF3Z; - std::vector<ProcessNeighborF3> recvProcessNeighborF3X; - std::vector<ProcessNeighborF3> recvProcessNeighborF3Y; - std::vector<ProcessNeighborF3> recvProcessNeighborF3Z; //////////////////////////////////////////////////////////////////////////// // 3D domain decomposition: position (index in array) of corner nodes in ProcessNeighbor27 struct EdgeNodePositions { @@ -249,9 +241,6 @@ struct LBMSimulationParameter { // ADD IN FUTURE RELEASE ////////////////////////////////////////////////////////////////////////// - // distributions F3//////// - Distributions6 g6; - // BC NoSlip TempforBoundaryConditions Temp; // BC Velocity @@ -594,7 +583,6 @@ public: void setSGSConstant(real SGSConstant); void setHasWallModelMonitor(bool hasWallModelMonitor); void setUseInitNeq(bool useInitNeq); - void setIsF3(bool isF3); void setIsBodyForce(bool isBodyForce); void setclockCycleForMP(real clockCycleForMP); void setDevices(std::vector<uint> devices); @@ -874,7 +862,6 @@ public: real getSGSConstant(); bool getHasWallModelMonitor(); bool getUseInitNeq(); - bool getIsF3(); bool getIsBodyForce(); double getMemsizeGPU(); // 1D domain decomposition @@ -965,7 +952,6 @@ private: bool compOn{ false }; bool diffOn{ false }; - bool isF3{ false }; bool calcDragLift{ false }; bool calcCp{ false }; bool writeVeloASCII{ false };