diff --git a/.gitignore b/.gitignore index 578d3f91a2de6cac430328063dc90e85611961e1..d16238c4ac7d45440117af9bc7ac0479a90dae2d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,12 +21,6 @@ __pycache__/ output/ logs/ -# Input files -stl/ - -# Scripts -scripts/ - # scripts scripts/ diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt index dba88229da2d2fb66dd893f058ece95005fa60e1..51b8db1edf126ebe7e2f3d5808496121270433c5 100644 --- a/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt +++ b/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt @@ -5,6 +5,4 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera set_source_files_properties(DrivenCavityMultiGPU.cpp PROPERTIES LANGUAGE CUDA) set_target_properties(DrivenCavityMultiGPU PROPERTIES - CUDA_SEPARABLE_COMPILATION ON - VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe" - VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:DrivenCavityMultiGPU>\"") \ No newline at end of file + CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file diff --git a/apps/gpu/LBM/MusselOyster/CMakeLists.txt b/apps/gpu/LBM/MusselOyster/CMakeLists.txt index 2dbbd7a7fcb11b8f619fdcc6bcf5da1b716950b7..595d9ff7250d984f80e8d0d54dad0b11ae7e71e2 100644 --- a/apps/gpu/LBM/MusselOyster/CMakeLists.txt +++ b/apps/gpu/LBM/MusselOyster/CMakeLists.txt @@ -5,6 +5,4 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera set_source_files_properties(MusselOyster.cpp PROPERTIES LANGUAGE CUDA) set_target_properties(MusselOyster PROPERTIES - CUDA_SEPARABLE_COMPILATION ON - VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe" - VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:MusselOyster>\"") \ No newline at end of file + CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file diff --git a/apps/gpu/LBM/MusselOyster/MusselOysterMPI.bat b/apps/gpu/LBM/MusselOyster/MusselOysterMPI.bat deleted file mode 100644 index c574cd5217bc46eb687d81ad2bedb2df53d295fc..0000000000000000000000000000000000000000 --- a/apps/gpu/LBM/MusselOyster/MusselOysterMPI.bat +++ /dev/null @@ -1,4 +0,0 @@ -:: don't close cmd on error -if not defined in_subprocess (cmd /k set in_subprocess=y ^& %0 %*) & exit ) -:: @ECHO OFF -mpiexec -n 2 C:\Users\Master\Documents\MasterAnna\VirtualFluids_dev\build\bin\Release\MusselOyster.exe \ No newline at end of file diff --git a/apps/gpu/LBM/SphereScaling/CMakeLists.txt b/apps/gpu/LBM/SphereScaling/CMakeLists.txt index 49bee20f7cfc3561c62cf1b36c2f2992e7baada8..db3747f2b620cab1efc5cf50f02aee1a8fee4a54 100644 --- a/apps/gpu/LBM/SphereScaling/CMakeLists.txt +++ b/apps/gpu/LBM/SphereScaling/CMakeLists.txt @@ -5,6 +5,4 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera set_source_files_properties(SphereScaling.cpp PROPERTIES LANGUAGE CUDA) set_target_properties(SphereScaling PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) - # VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe" - # VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:SphereScaling>\"") \ No newline at end of file + CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file diff --git a/gpu.cmake b/gpu.cmake index 2bf803ab7e3725dc648074fe1c250cdc49c1f662..5b70ed9da9cb5f0ac56d09a0f91f0a6b6d13b89a 100644 --- a/gpu.cmake +++ b/gpu.cmake @@ -31,14 +31,14 @@ IF (BUILD_VF_GPU) #add_subdirectory(targets/apps/LBM/BaselNU) #add_subdirectory(targets/apps/LBM/BaselMultiGPU) - #add_subdirectory(apps/gpu/LBM/DrivenCavity) + add_subdirectory(apps/gpu/LBM/DrivenCavity) #add_subdirectory(apps/gpu/LBM/WTG_RUB) #add_subdirectory(apps/gpu/LBM/gridGeneratorTest) #add_subdirectory(apps/gpu/LBM/TGV_3D) #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU) - add_subdirectory(apps/gpu/LBM/SphereScaling) - add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU) - add_subdirectory(apps/gpu/LBM/MusselOyster) + #add_subdirectory(apps/gpu/LBM/SphereScaling) + #add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU) + #add_subdirectory(apps/gpu/LBM/MusselOyster) #add_subdirectory(apps/gpu/LBM/Poiseuille) #add_subdirectory(apps/gpu/LBM/ActuatorLine) #add_subdirectory(apps/gpu/LBM/BoundaryLayer) diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index c6116ea37e6a6b17c7c3ded73d3e8478f07c41da..c8f281702f1b8121dfb9f7d3e0d0343f9a0374c4 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -10,7 +10,7 @@ //! \file ExchangeData27.h //! \ingroup GPU //! \author Martin Schoenherr, Anna Wellmann -//! \brief routines for data exchange when running simulations on multiple GPUs +//! \brief Routines for data exchange when running simulations on multiple GPUs ////////////////////////////////////////////////////////////////////////// // 1D domain decomposition @@ -23,19 +23,19 @@ extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator // functions used for all directions -//! \brief collect the send nodes in a buffer on the gpu +//! \brief Collect the send nodes in a buffer on the gpu extern "C" void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighbor, unsigned int numberOfSendProcessNeighbors); -//! \brief distribute the receive nodes from the buffer on the gpu +//! \brief Distribute the receive nodes from the buffer on the gpu extern "C" void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, unsigned int numberOfRecvProcessNeighbors); -//! \brief copy nodes which are part of the communication in multiple directions +//! \brief Copy nodes which are part of the communication in multiple directions //! \details The nodes are copied from the receive buffer in one direction to the send buffer in another direction. The //! copy operation is conducted on the cpu. -//! \ref see master thesis of Anna Wellmann (p. 56f: "Communication Hiding bei -//! der Verwendung eines uniformen Simulationsgitters") +//! See [master thesis of Anna Wellmann (p. 56f: "Communication Hiding bei +//! der Verwendung eines uniformen Simulationsgitters")] //! \param edgeNodes determines from where to where the nodes are //! copied //! \param recvProcessNeighborHost is a reference to the receive buffer on the host, nodes are copied from here @@ -47,16 +47,16 @@ extern "C" void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositi ////////////////////////////////////////////////////////////////////////// // x -//! \brief collect the send nodes for communication in the x direction in a buffer on the gpu -//! \details needed to exchange all nodes, used in the communication after collision step +//! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu +//! \details Needed to exchange all nodes, used in the communication after collision step extern "C" void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex); -//! \brief collect the send nodes for communication in the x direction in a buffer on the gpu +//! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine. -//! \ref see master thesis of Anna Wellmann +//! See [master thesis of Anna Wellmann] extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex); -//! \brief exchange routine in x direction for simulations on multiple gpus -//! \details send and receive the nodes from the communication buffers on the gpus +//! \brief Exchange routine in x direction for simulations on multiple gpus +//! \details Send and receive the nodes from the communication buffers on the gpus. //! \param Communicator is needed for the communication between the processes with mpi //! \param CudaMemoryManager is needed for moving the data between host and device //! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding @@ -68,23 +68,24 @@ extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &c std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost); -//! \brief calls exchangeCollDataXGPU27() for exchanging all nodes -//! \details used in the communication after collision step +//! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes +//! \details Used in the communication after collision step extern "C" void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -//! \brief calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two -//! interpolation processes on refined grids \details Only exchange nodes which are part of the interpolation process on +//! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two +//! interpolation processes on refined grids +//! \details Only exchange nodes which are part of the interpolation process on //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and -//! before the interpolation coarse to fine. \ref see master thesis of Anna Wellmann +//! before the interpolation coarse to fine. See [master thesis of Anna Wellmann] extern "C" void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -//! \brief distribute the receive nodes (x direction) from the buffer on the gpu -//! \details needed to exchange all nodes, used in the communication after collision step +//! \brief Distribute the receive nodes (x direction) from the buffer on the gpu +//! \details Needed to exchange all nodes, used in the communication after collision step extern "C" void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex); -//! \brief distribute the receive nodes (x direction) from the buffer on the gpu +//! \brief Distribute the receive nodes (x direction) from the buffer on the gpu //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine. -//! \ref see master thesis of Anna Wellmann +//! See [master thesis of Anna Wellmann] extern "C" void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex); ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h index a58f320a0e72e1ac5b14eb8b3d1ccb3a7766d2a3..3c68ef7c57bc37b9c1f713df1052d483bee3bda1 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h @@ -1,7 +1,7 @@ //! \file IndexRearrangementForStreams.h //! \ingroup GPU //! \author Anna Wellmann -//! \ref master thesis of Anna Wellmann +//! \details See [master thesis of Anna Wellmann] #ifndef IndexRearrangementForStreams_H #define IndexRearrangementForStreams_H @@ -16,39 +16,30 @@ class Parameter; class GridBuilder; -namespace vf -{ -namespace gpu +namespace vf::gpu { class Communicator; } -} // namespace vf class IndexRearrangementForStreams { -private: - std::shared_ptr<GridBuilder> builder; - std::shared_ptr<Parameter> para; - vf::gpu::Communicator& communicator; public: - //! \brief construct IndexRearrangementForStreams object + //! \brief Construct IndexRearrangementForStreams object IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder, vf::gpu::Communicator& communicator); ////////////////////////////////////////////////////////////////////////// // communication after coarse to fine ////////////////////////////////////////////////////////////////////////// - //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in x direction + //! \brief Initialize the arrays for the communication after the interpolation from fine to coarse in x direction //! \details Only the nodes involved in the interpolation need to be exchanged. Therefore in this method all nodes, //! which are part of the interpolation as well as the communication, are identified. - //! - //! \ref see master thesis of Anna - //! Wellmann (p. 59-62: "Reduzieren der auszutauschenden Knoten") + //!See [master thesis of Anna Wellmann (p. 59-62: "Reduzieren der auszutauschenden Knoten")] void initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, int j, int direction); - //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in y direction + //! \brief Initialize the arrays for the communication after the interpolation from fine to coarse in y direction //! \details --> see x direction void initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, int j, int direction); - //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in z direction + //! \brief Initialize the arrays for the communication after the interpolation from fine to coarse in z direction //! \details --> see x direction void initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, int j, int direction); @@ -57,7 +48,7 @@ public: // split interpolation cells ////////////////////////////////////////////////////////////////////////// - //! \brief split the interpolation cells from coarse to fine into border an bulk + //! \brief Split the interpolation cells from coarse to fine into border an bulk //! \details For communication hiding, the interpolation cells from the coarse to the fine grid need to be split //! into two groups: //! @@ -65,10 +56,10 @@ public: //! //! - the other cells which are not directly related to the communication between the two gpus --> "bulk" //! - //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation") + //! see [master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")] void splitCoarseToFineIntoBorderAndBulk(const uint &level); - //! \brief split the interpolation cells from fine to coarse into border an bulk + //! \brief Split the interpolation cells from fine to coarse into border an bulk //! \details For communication hiding, the interpolation cells from the fine to the coarse grid need to be split //! into two groups: //! @@ -76,7 +67,7 @@ public: //! //! - the other cells which are not directly related to the communication between the two gpus --> "bulk" //! - //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation") + //! See [master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")] void splitFineToCoarseIntoBorderAndBulk(const uint &level); private: @@ -84,7 +75,7 @@ private: // communication after coarse to fine ////////////////////////////////////////////////////////////////////////// - //! \brief inits pointers for reduced communication after interpolation fine to coarse by copying them from "normal" + //! \brief Initializes pointers for reduced communication after interpolation fine to coarse by copying them from "normal" //! communication void copyProcessNeighborToCommAfterFtoCX(const uint &level, int indexOfProcessNeighbor); void copyProcessNeighborToCommAfterFtoCY(const uint &level, int indexOfProcessNeighbor); @@ -97,7 +88,7 @@ private: void reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - //! \brief the send indices are reordered for the communication after the interpolation from fine to coarse + //! \brief The send indices are reordered for the communication after the interpolation from fine to coarse //! \details The indices of nodes which are part of the interpolation are moved to the front of vector with the send //! indices. //! \pre para->getParH(level)->intCF needs to be inititalized @@ -106,22 +97,22 @@ private: //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction, int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - //! \brief check if a sparse index occurs in the ICellFCC + //! \brief Check if a sparse index occurs in the ICellFCC bool isSparseIndexInICellFCC(uint sizeOfICellFCC, int sparseIndexSend, int level); - //! \brief aggregate all nodes in the coarse cells for the interpolation in coarse to fine + //! \brief Aggregate all nodes in the coarse cells for the interpolation in coarse to fine //! \details For the coarse cells in the interpolation from coarse to fine only one node is stored. This methods //! looks for the other nodes of each cell and puts them into vector. Duplicate nodes are only stored once. void aggregateNodesInICellCFC(int level, std::vector<uint> &nodesCFC); - //! \brief add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC + //! \brief Add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC void addUniqueIndexToCommunicationVectors(std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend, std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions, uint &posInSendIndices) const; - //! \brief find if a sparse index is a send index. If true, call addUniqueIndexToCommunicationVectors() + //! \brief Find if a sparse index is a send index. If true, call addUniqueIndexToCommunicationVectors() void findIfSparseIndexIsInSendIndicesAndAddToCommVectors(int sparseIndex, int *sendIndices, uint numberOfSendIndices, std::vector<int> &sendIndicesAfterFtoC, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const; - //! \brief find all indices which are not part of the communication after the interpolation from fine to coarse + //! \brief Find all indices which are not part of the communication after the interpolation from fine to coarse void findIndicesNotInCommAfterFtoC(const uint &numberOfSendOrRecvIndices, int *sendOrReceiveIndices, std::vector<int> &sendOrReceiveIndicesAfterFtoC, std::vector<int> &sendOrIndicesOther); @@ -133,7 +124,7 @@ private: void reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - //! \brief reorder the receive indices in the same way that the send indices were reordered. + //! \brief Reorder the receive indices in the same way that the send indices were reordered. //! \details When the send indices are reordered, the receive indices need to be reordered accordingly. //! \pre sendIndicesForCommAfterFtoCPositions should not be empty //! \param recvIndices is the pointer to the vector with the receive indices, which will be reordered in this function @@ -159,6 +150,11 @@ private: void getGridInterfaceIndicesBorderBulkFC(int level); +private: + std::shared_ptr<GridBuilder> builder; + std::shared_ptr<Parameter> para; + vf::gpu::Communicator& communicator; + // used for tests friend class IndexRearrangementForStreamsTest_reorderSendIndices; }; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp index e6960cc18ff7905fdcc351f3396a7a0948243dbf..3151e6bedeb6a96666f11f0040de2c95b20cc42c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp @@ -27,25 +27,6 @@ void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy this->checkStrategy = strategy; } - - KernelImp::KernelImp(std::shared_ptr<Parameter> para, int level) : para(para), level(level) {} -KernelImp::KernelImp() {} - -std::unique_ptr<std::pair<dim3, dim3>> KernelImp::calcGridDimensions(unsigned int size_Mat, int numberOfThreads) -{ - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } else { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - std::pair<dim3, dim3> dimensions(grid, threads); - return std::make_unique<std::pair<dim3, dim3>>(dimensions); -} +KernelImp::KernelImp() {} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h index 1fb3d51a293b98af5f6f7880b46195c060f1fc7e..cba3540905df0314d6ce1eb6f0a1eab8d4a5a4c4 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h @@ -33,8 +33,6 @@ protected: KernelGroup myKernelGroup; vf::cuda::CudaGrid cudaGrid; - - std::unique_ptr<std::pair<dim3, dim3>> calcGridDimensions(unsigned int size_Mat, int numberOfThreads); }; #endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu index 1caf0340c11cfc86214781ad89e2c6318730b5a6..2b3b72a6888e62ccac1009d2f1ece14b96bf93be 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu @@ -2,6 +2,7 @@ #include "Parameter/Parameter.h" #include "CumulantK17Comp_Device.cuh" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK17Comp> CumulantK17Comp::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,11 +11,7 @@ std::shared_ptr<CumulantK17Comp> CumulantK17Comp::getNewInstance(std::shared_ptr void CumulantK17Comp::run() { - dim3 grid, threads; - std::tie(grid, threads) = - *calcGridDimensions(para->getParD(level)->size_Mat_SP, para->getParD(level)->numberofthreads); - - LB_Kernel_CumulantK17Comp <<< grid, threads >>>(para->getParD(level)->omega, + LB_Kernel_CumulantK17Comp <<< cudaGrid.grid, cudaGrid.threads >>>(para->getParD(level)->omega, para->getParD(level)->geoSP, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -28,12 +25,9 @@ void CumulantK17Comp::run() getLastCudaError("LB_Kernel_CumulantK17Comp execution failed"); } -CumulantK17Comp::CumulantK17Comp(std::shared_ptr<Parameter> para, int level) +CumulantK17Comp::CumulantK17Comp(std::shared_ptr<Parameter> para, int level): KernelImp(para, level) { - this->para = para; - this->level = level; - myPreProcessorTypes.push_back(InitCompSP27); - myKernelGroup = BasicKernel; + this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu index a91008421d37643869dde3c7b398e12fd8778b63..09db3da401edbb4be2a3e3409dac9138c6fad4ad 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu @@ -2,6 +2,7 @@ #include "Parameter/Parameter.h" #include "CumulantK17CompChim_Device.cuh" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK17CompChim> CumulantK17CompChim::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,11 +11,7 @@ std::shared_ptr<CumulantK17CompChim> CumulantK17CompChim::getNewInstance(std::sh void CumulantK17CompChim::run() { - dim3 grid, threads; - std::tie(grid, threads) = - *calcGridDimensions(para->getParD(level)->size_Mat_SP, para->getParD(level)->numberofthreads); - - LB_Kernel_CumulantK17CompChim <<< grid, threads >>>( + LB_Kernel_CumulantK17CompChim <<< cudaGrid.grid, cudaGrid.threads >>>( para->getParD(level)->omega, para->getParD(level)->geoSP, para->getParD(level)->neighborX_SP, @@ -33,12 +30,9 @@ void CumulantK17CompChim::run() getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); } -CumulantK17CompChim::CumulantK17CompChim(std::shared_ptr<Parameter> para, int level) +CumulantK17CompChim::CumulantK17CompChim(std::shared_ptr<Parameter> para, int level): KernelImp(para, level) { - this->para = para; - this->level = level; - myPreProcessorTypes.push_back(InitCompSP27); - myKernelGroup = BasicKernel; + this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu index 1f8538d5c39df3c7f40c77a90c664da0ae14af32..255452dbc016fdc732277e17f9736d3713db719a 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu @@ -14,11 +14,7 @@ std::shared_ptr<CumulantK17CompChimStream> CumulantK17CompChimStream::getNewInst void CumulantK17CompChimStream::run() { - dim3 grid, threads; - std::tie(grid, threads) = - *calcGridDimensions(para->getParD(level)->numberOfFluidNodes, para->getParD(level)->numberofthreads); - - LB_Kernel_CumulantK17CompChimStream <<< grid, threads >>>( + LB_Kernel_CumulantK17CompChimStream <<< cudaGrid.grid, cudaGrid.threads >>>( para->getParD(level)->omega, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -36,13 +32,9 @@ void CumulantK17CompChimStream::run() void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex) { - dim3 grid, threads; - std::tie(grid, threads) = - *calcGridDimensions(para->getParD(level)->numberOfFluidNodes, para->getParD(level)->numberofthreads); - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - LB_Kernel_CumulantK17CompChimStream<<<grid, threads, 0, stream>>>( + LB_Kernel_CumulantK17CompChimStream<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>( para->getParD(level)->omega, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -59,13 +51,10 @@ void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsign } -CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level) +CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level): KernelImp(para, level) { - this->para = para; - this->level = level; - myPreProcessorTypes.push_back(InitCompSP27); - myKernelGroup = BasicKernel; + this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp index 4ed2118198466438d734ba7877db0b55447f4f3e..3731836f336d91c1bc4cc5f1a8f5ea0a10bee0a6 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp @@ -31,10 +31,6 @@ #include <helper_cuda.h> #include <iostream> -CudaStreamManager::CudaStreamManager() {} - -CudaStreamManager::~CudaStreamManager() {} - void CudaStreamManager::launchStreams(uint numberOfStreams) { cudaStreams.resize(numberOfStreams); diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h index d78bd05a2d7e2a474a40ab3368d8a6b7c73d1bde..c2d515ab5fe9c24388632a7ca9e1e4c78b7f1467 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h @@ -44,9 +44,6 @@ private: const int bulkStreamIndex = 0; public: - CudaStreamManager(); - ~CudaStreamManager(); - void launchStreams(uint numberOfStreams); void terminateStreams(); cudaStream_t &getStream(uint streamIndex);