From 23d9332b882573868e40ce2fcaad03c2ac97a58a Mon Sep 17 00:00:00 2001 From: HenrikAsmuth <henrik.asmuth@geo.uu.se> Date: Wed, 12 Oct 2022 11:51:46 +0200 Subject: [PATCH] Restructure call of collision kernel with enum CollisionTemplate --- .../Calculation/CollisisionStrategy.cpp | 20 ++++-- .../Calculation/UpdateGrid27.cpp | 7 ++- .../Calculation/UpdateGrid27.h | 2 +- src/gpu/VirtualFluids_GPU/Kernel/Kernel.h | 4 +- .../VirtualFluids_GPU/Kernel/KernelImp.cpp | 4 +- src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h | 4 +- .../CumulantK17Almighty.cu | 61 +++++++++++-------- .../CumulantK17Almighty/CumulantK17Almighty.h | 2 +- src/gpu/VirtualFluids_GPU/LBM/LB.h | 20 ++++-- 9 files changed, 81 insertions(+), 43 deletions(-) diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp index d8120d8a0..773d6d2aa 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp @@ -39,8 +39,10 @@ void CollisionAndExchange_noStreams_indexKernel::operator()(UpdateGrid27 *update //! //! 1. run collision //! - updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, -1); + updateGrid->collisionUsingIndices( level, t, + para->getParD(level)->fluidNodeIndices, + para->getParD(level)->numberOfFluidNodes, + CollisionTemplate::Default, -1); //! 2. exchange information between GPUs updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false); @@ -68,8 +70,11 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete //! //! 1. run collision for nodes which are at the border of the gpus/processes //! - updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndicesBorder, - para->getParD(level)->numberOfFluidNodesBorder, borderStreamIndex); + updateGrid->collisionUsingIndices( level, t, + para->getParD(level)->fluidNodeIndicesBorder, + para->getParD(level)->numberOfFluidNodesBorder, + CollisionTemplate::Default, + borderStreamIndex); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! @@ -80,8 +85,11 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete //! 3. launch the collision kernel for bulk nodes //! para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, bulkStreamIndex); + updateGrid->collisionUsingIndices( level, t, + para->getParD(level)->fluidNodeIndices, + para->getParD(level)->numberOfFluidNodes, + CollisionTemplate::Default, + bulkStreamIndex); //! 4. exchange information between GPUs updateGrid->exchangeMultiGPU(level, borderStreamIndex); diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 92c167686..eaec7dbbc 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -71,15 +71,16 @@ void UpdateGrid27::collisionAllNodes(int level, unsigned int t) collisionAdvectionDiffusion(level); } -void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream) +void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *indices, uint numberOfIndices, CollisionTemplate collisionTemplate, int stream) { - if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0) - kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, false, false, stream); + if (indices != nullptr && numberOfIndices != 0) + kernels.at(level)->runOnIndices(indices, numberOfIndices, collisionTemplate, stream); else std::cout << "In collision: fluidNodeIndices or numberOfFluidNodes not definded" << std::endl; ////////////////////////////////////////////////////////////////////////// + //! \todo: AD collision and porousMedia should be called separately, not in collisionUsingIndices if (para->getSimulatePorousMedia()) collisionPorousMedia(level); diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 4106161c9..424533e66 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -31,7 +31,7 @@ public: private: void collisionAllNodes(int level, unsigned int t); - void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1); + void collisionUsingIndices(int level, unsigned int t, uint *indices, uint numberOfIndices, CollisionTemplate collisionTemplate, int stream = -1); void collisionAdvectionDiffusion(int level); void postCollisionBC(int level); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h index 7222eeaba..3062eb4d4 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h @@ -3,6 +3,8 @@ #include <vector> +#include "LBM/LB.h" + #include "Kernel/Utilities/KernelGroup.h" #include "PreProcessor/PreProcessorType.h" @@ -13,7 +15,7 @@ class Kernel public: virtual ~Kernel() = default; virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, bool writeMacroscopicVariables, bool applyBodyForce, int stream = -1) = 0; //if stream == -1: run on default stream + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, int stream = -1) = 0; //if stream == -1: run on default stream virtual bool checkParameter() = 0; virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp index c272d6fdc..1798ff139 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp @@ -1,9 +1,11 @@ #include "KernelImp.h" +#include "LBM/LB.h" + #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h" -void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, bool writeMacroscopicVariables, bool applyBodyForce, int stream) +void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, int stream) { printf("Method not implemented for this Kernel \n"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h index d0c4d8c83..93f56f26c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h @@ -1,6 +1,8 @@ #ifndef KERNEL_IMP_H #define KERNEL_IMP_H +#include "LBM/LB.h" + #include "Kernel.h" #include <memory> @@ -14,7 +16,7 @@ class KernelImp : public Kernel { public: virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, bool writeMacroscopicVariables, bool applyBodyForce, int stream = -1); + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, int stream = -1); bool checkParameter(); std::vector<PreProcessorType> getPreProcessorTypes(); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.cu index 57b7632e5..64db26341 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.cu @@ -37,12 +37,34 @@ void CumulantK17Almighty<turbulenceModel>::run() } template<TurbulenceModel turbulenceModel> -void CumulantK17Almighty<turbulenceModel>::runOnIndices( const unsigned int *indices, unsigned int size_indices, bool writeMacroscopicVariables, bool applyBodyForce, int streamIndex ) +void CumulantK17Almighty<turbulenceModel>::runOnIndices( const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, int streamIndex ) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - if( !writeMacroscopicVariables && !applyBodyForce){ - LB_Kernel_CumulantK17Almighty < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + switch (collisionTemplate) + { + case CollisionTemplate::Default: + LB_Kernel_CumulantK17Almighty < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + (unsigned long)para->getParD(level)->numberOfNodes, + level, + para->getIsBodyForce(), + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; + + case CollisionTemplate::WriteMacroVars: + LB_Kernel_CumulantK17Almighty < turbulenceModel, true, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, para->getParD(level)->distributions.f[0], @@ -59,8 +81,10 @@ void CumulantK17Almighty<turbulenceModel>::runOnIndices( const unsigned int *ind para->getParD(level)->isEvenTimestep, indices, size_indices); - } else if ( writeMacroscopicVariables && !applyBodyForce ){ - LB_Kernel_CumulantK17Almighty < turbulenceModel, true, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + break; + + case CollisionTemplate::AllFeatures: + LB_Kernel_CumulantK17Almighty < turbulenceModel, true, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, para->getParD(level)->distributions.f[0], @@ -77,26 +101,9 @@ void CumulantK17Almighty<turbulenceModel>::runOnIndices( const unsigned int *ind para->getParD(level)->isEvenTimestep, indices, size_indices); - } else if ( writeMacroscopicVariables && applyBodyForce ){ - LB_Kernel_CumulantK17Almighty < turbulenceModel, true, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); - } else { - LB_Kernel_CumulantK17Almighty < turbulenceModel, false, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + break; + case CollisionTemplate::ApplyBodyForce: + LB_Kernel_CumulantK17Almighty < turbulenceModel, false, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, para->getParD(level)->distributions.f[0], @@ -113,6 +120,10 @@ void CumulantK17Almighty<turbulenceModel>::runOnIndices( const unsigned int *ind para->getParD(level)->isEvenTimestep, indices, size_indices); + break; + default: + throw std::runtime_error("Invalid CollisionTemplate in CumulantK17Almighty::runOnIndices()"); + break; } getLastCudaError("LB_Kernel_CumulantK17Almighty execution failed"); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.h index c855afaaf..b49b34e63 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Almighty/CumulantK17Almighty.h @@ -10,7 +10,7 @@ class CumulantK17Almighty : public KernelImp public: static std::shared_ptr< CumulantK17Almighty<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level); void run() override; - void runOnIndices(const unsigned int *indices, unsigned int size_indices, bool writeMacroscopicVariables, bool applyBodyForce, int stream = -1) override; + void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, int streamIndex = -1) override; private: CumulantK17Almighty(); diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index 813b4ccb0..0b93abecb 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -53,15 +53,27 @@ //! \brief An enumeration for selecting a turbulence model enum class TurbulenceModel { //! - Smagorinsky - Smagorinsky, + Smagorinsky, //! - AMD (Anisotropic Minimum Dissipation) model, see e.g. Rozema et al., Phys. Fluids 27, 085107 (2015), https://doi.org/10.1063/1.4928700 - AMD, + AMD, //! - QR model by Verstappen - QR, + QR, //! - TODO: move the WALE model here from the old kernels //WALE //! - No turbulence model - None + None +}; + +//! \brief An enumeration for selecting a template of the collision kernel (CumulantK17Almighty) +enum class CollisionTemplate { + //! - Default: plain collision without additional read/write + Default, + //! - WriteMacroVars: write out macroscopic variables in the collision kernel + WriteMacroVars, + //! - ApplyBodyForce: read and apply body force in the collision kernel + ApplyBodyForce, + //! - AllFeatures: write out macroscopic variables AND read and apply body force + AllFeatures }; struct InitCondition -- GitLab