Skip to content
Snippets Groups Projects
Commit d02e2d26 authored by Henrik Asmuth's avatar Henrik Asmuth
Browse files

add specialised run function for turbVisc cumulant kernel

parent 8f190580
No related branches found
No related tags found
1 merge request!170Kernel templetization and efficiency improvements
Showing
with 185 additions and 2 deletions
...@@ -32,4 +32,10 @@ public: ...@@ -32,4 +32,10 @@ public:
void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t); void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t);
}; };
//! \brief experimental version for specialized collision kernel calls with different read/write options
class CollisionAndExchange_noStreams_withReadWriteFlags
{
void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t);
};
#endif #endif
...@@ -86,3 +86,20 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete ...@@ -86,3 +86,20 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete
//! 4. exchange information between GPUs //! 4. exchange information between GPUs
updateGrid->exchangeMultiGPU(level, borderStreamIndex); updateGrid->exchangeMultiGPU(level, borderStreamIndex);
} }
void CollisionAndExchange_noStreams_withReadWriteFlags::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level,
unsigned int t)
{
//! \details steps:
//!
//! 1. run collision
//!
updateGrid->collisionWithReadWriteFlags(level, t,
para->getParD(level)->fluidNodeIndices, para->getParD(level)->numberOfFluidNodes,
para->getParD(level)->indicesWithMacroscopicVariableOutput, para->getParD(level)->numberOfIndicesWithMacroscopicVariableOutput,
para->getParD(level)->indicesWithApplyBodyForce, para->getParD(level)->numberOfIndicesWithApplyBodyForce,
para->getParD(level)->indicesWithMacroscopicVariableOutputAndApplyBodyForce, para->getParD(level)->numberOfIndicesWithMacroscopicVariableOutputAndApplyBodyForce);
//! 2. exchange information between GPUs
updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
}
\ No newline at end of file
...@@ -90,6 +90,37 @@ void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *fluidN ...@@ -90,6 +90,37 @@ void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *fluidN
collisionAdvectionDiffusion(level); collisionAdvectionDiffusion(level);
} }
void UpdateGrid27::collisionWithReadWriteFlags(int level, unsigned int t, uint *fluidNodeIndices, uint numberOfFluidNodes,
uint *indicesWithMacroscopicVariableOutput, uint numberOfIndicesWithMacroscopicVariableOutput,
uint *indicesWithApplyBodyForce, uint numberOfIndicesWithApplyBodyForce,
uint *indicesWithMacroscopicVariableOutputAndApplyBodyForce, uint numberOfIndicesWithMacroscopicVariableOutputAndApplyBodyForce)
{
void runOnIndicesWithMacroscopicVariableOutput( const unsigned int *indices, unsigned int size_indices, int streamIndex = -1);
void runOnIndicesWithApplyBodyForce( const unsigned int *indices, unsigned int size_indices, int streamIndex = -1);
void runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce( const unsigned int *indices, unsigned int size_indices, int streamIndex = -1);
if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0)
kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, 1);
else if (indicesWithMacroscopicVariableOutput != nullptr && numberOfIndicesWithMacroscopicVariableOutput != 0)
kernels.at(level)->runOnIndicesWithMacroscopicVariableOutput(indicesWithMacroscopicVariableOutput, numberOfIndicesWithMacroscopicVariableOutput, 2);
else if (indicesWithApplyBodyForce != nullptr && numberOfIndicesWithApplyBodyForce != 0)
kernels.at(level)->runOnIndicesWithApplyBodyForce(indicesWithMacroscopicVariableOutput, numberOfIndicesWithMacroscopicVariableOutput, 3);
else if (indicesWithMacroscopicVariableOutputAndApplyBodyForce != nullptr && numberOfIndicesWithMacroscopicVariableOutputAndApplyBodyForce != 0)
kernels.at(level)->runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce(indicesWithMacroscopicVariableOutputAndApplyBodyForce, numberOfIndicesWithMacroscopicVariableOutputAndApplyBodyForce, 4);
else
std::cout << "In collision: fluidNodeIndices or numberOfFluidNodes not definded" << std::endl;
//////////////////////////////////////////////////////////////////////////
if (para->getSimulatePorousMedia())
collisionPorousMedia(level);
//////////////////////////////////////////////////////////////////////////
if (para->getDiffOn())
collisionAdvectionDiffusion(level);
}
void UpdateGrid27::collisionPorousMedia(int level) void UpdateGrid27::collisionPorousMedia(int level)
{ {
for( std::size_t i = 0; i < pm.size(); i++ ) for( std::size_t i = 0; i < pm.size(); i++ )
......
...@@ -32,6 +32,12 @@ public: ...@@ -32,6 +32,12 @@ public:
private: private:
void collisionAllNodes(int level, unsigned int t); void collisionAllNodes(int level, unsigned int t);
void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1); void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1);
void collisionWithReadWriteFlags(int level, unsigned int t,
uint *fluidNodeIndices, uint numberOfFluidNodes,
uint *indicesWithMacroscopicVariableOutput, uint numberOfIndicesWithMacroscopicVariableOutput,
uint *indicesWithApplyBodyForce, uint numberOfIndicesWithApplyBodyForce,
uint *indicesWithMacroscopicVariableOutputAndApplyBodyForce, uint numberOfIndicesWithMacroscopicVariableOutputAndApplyBodyForce);
void collisionAdvectionDiffusion(int level); void collisionAdvectionDiffusion(int level);
void postCollisionBC(int level); void postCollisionBC(int level);
...@@ -60,6 +66,7 @@ private: ...@@ -60,6 +66,7 @@ private:
friend class CollisionAndExchange_noStreams_indexKernel; friend class CollisionAndExchange_noStreams_indexKernel;
friend class CollisionAndExchange_noStreams_oldKernel; friend class CollisionAndExchange_noStreams_oldKernel;
friend class CollisionAndExchange_streams; friend class CollisionAndExchange_streams;
friend class CollisionAndExchange_noStreams_withReadWriteFlags;
RefinementStrategy refinement; RefinementStrategy refinement;
friend class RefinementAndExchange_streams_exchangeInterface; friend class RefinementAndExchange_streams_exchangeInterface;
......
...@@ -14,6 +14,9 @@ public: ...@@ -14,6 +14,9 @@ public:
virtual ~Kernel() = default; virtual ~Kernel() = default;
virtual void run() = 0; virtual void run() = 0;
virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0; //if stream == -1: run on default stream virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0; //if stream == -1: run on default stream
virtual void runOnIndicesWithMacroscopicVariableOutput(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0;
virtual void runOnIndicesWithApplyBodyForce(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0;
virtual void runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0;
virtual bool checkParameter() = 0; virtual bool checkParameter() = 0;
virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
......
...@@ -8,6 +8,21 @@ void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indi ...@@ -8,6 +8,21 @@ void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indi
printf("Method not implemented for this Kernel \n"); printf("Method not implemented for this Kernel \n");
} }
void KernelImp::runOnIndicesWithMacroscopicVariableOutput(const unsigned int *indices, unsigned int size_indices, int stream)
{
printf("Method not implemented for this Kernel \n");
}
void KernelImp::runOnIndicesWithApplyBodyForce(const unsigned int *indices, unsigned int size_indices, int stream)
{
printf("Method not implemented for this Kernel \n");
}
void KernelImp::runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce(const unsigned int *indices, unsigned int size_indices, int stream)
{
printf("Method not implemented for this Kernel \n");
}
bool KernelImp::checkParameter() { bool KernelImp::checkParameter() {
return checkStrategy->checkParameter(para); return checkStrategy->checkParameter(para);
} }
......
...@@ -15,6 +15,9 @@ class KernelImp : public Kernel ...@@ -15,6 +15,9 @@ class KernelImp : public Kernel
public: public:
virtual void run() = 0; virtual void run() = 0;
virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1); virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1);
virtual void runOnIndicesWithMacroscopicVariableOutput(const unsigned int *indices, unsigned int size_indices, int stream = -1);
virtual void runOnIndicesWithApplyBodyForce(const unsigned int *indices, unsigned int size_indices, int stream = -1);
virtual void runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce(const unsigned int *indices, unsigned int size_indices, int stream = -1);
bool checkParameter(); bool checkParameter();
std::vector<PreProcessorType> getPreProcessorTypes(); std::vector<PreProcessorType> getPreProcessorTypes();
......
...@@ -43,7 +43,7 @@ void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::run() ...@@ -43,7 +43,7 @@ void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::run()
} }
template<TurbulenceModel turbulenceModel> template<TurbulenceModel turbulenceModel>
void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex) void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndices( const unsigned int *indices, unsigned int size_indices, int streamIndex )
{ {
cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
...@@ -73,6 +73,99 @@ void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndices(const ...@@ -73,6 +73,99 @@ void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndices(const
getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed"); getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed");
} }
template<TurbulenceModel turbulenceModel>
void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndicesWithMacroscopicVariableOutput( const unsigned int *indices, unsigned int size_indices, int streamIndex)
{
cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel, true, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX,
para->getParD(level)->neighborY,
para->getParD(level)->neighborZ,
para->getParD(level)->distributions.f[0],
para->getParD(level)->rho,
para->getParD(level)->velocityX,
para->getParD(level)->velocityY,
para->getParD(level)->velocityZ,
para->getParD(level)->turbViscosity,
para->getSGSConstant(),
(unsigned long)para->getParD(level)->numberOfNodes,
level,
para->getIsBodyForce(),
para->getForcesDev(),
para->getParD(level)->forceX_SP,
para->getParD(level)->forceY_SP,
para->getParD(level)->forceZ_SP,
para->getQuadricLimitersDev(),
para->getParD(level)->isEvenTimestep,
indices,
size_indices);
getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed");
}
template<TurbulenceModel turbulenceModel>
void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndicesWithApplyBodyForce( const unsigned int *indices, unsigned int size_indices, int streamIndex)
{
cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel, false, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX,
para->getParD(level)->neighborY,
para->getParD(level)->neighborZ,
para->getParD(level)->distributions.f[0],
para->getParD(level)->rho,
para->getParD(level)->velocityX,
para->getParD(level)->velocityY,
para->getParD(level)->velocityZ,
para->getParD(level)->turbViscosity,
para->getSGSConstant(),
(unsigned long)para->getParD(level)->numberOfNodes,
level,
para->getIsBodyForce(),
para->getForcesDev(),
para->getParD(level)->forceX_SP,
para->getParD(level)->forceY_SP,
para->getParD(level)->forceZ_SP,
para->getQuadricLimitersDev(),
para->getParD(level)->isEvenTimestep,
indices,
size_indices);
getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed");
}
template<TurbulenceModel turbulenceModel>
void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce( const unsigned int *indices, unsigned int size_indices, int streamIndex)
{
cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel, true, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX,
para->getParD(level)->neighborY,
para->getParD(level)->neighborZ,
para->getParD(level)->distributions.f[0],
para->getParD(level)->rho,
para->getParD(level)->velocityX,
para->getParD(level)->velocityY,
para->getParD(level)->velocityZ,
para->getParD(level)->turbViscosity,
para->getSGSConstant(),
(unsigned long)para->getParD(level)->numberOfNodes,
level,
para->getIsBodyForce(),
para->getForcesDev(),
para->getParD(level)->forceX_SP,
para->getParD(level)->forceY_SP,
para->getParD(level)->forceZ_SP,
para->getQuadricLimitersDev(),
para->getParD(level)->isEvenTimestep,
indices,
size_indices);
getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed");
}
template<TurbulenceModel turbulenceModel> template<TurbulenceModel turbulenceModel>
TurbulentViscosityCumulantK17CompChim<turbulenceModel>::TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level) TurbulentViscosityCumulantK17CompChim<turbulenceModel>::TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level)
{ {
......
...@@ -11,7 +11,9 @@ public: ...@@ -11,7 +11,9 @@ public:
static std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level); static std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level);
void run() override; void run() override;
void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override; void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override;
void runOnIndicesWithMacroscopicVariableOutput( const unsigned int *indices, unsigned int size_indices, int streamIndex = -1) override;
void runOnIndicesWithApplyBodyForce( const unsigned int *indices, unsigned int size_indices, int streamIndex = -1) override;
void runOnIndicesWithMacroscopicVariableOutputAndApplyBodyForce( const unsigned int *indices, unsigned int size_indices, int streamIndex = -1) override;
private: private:
TurbulentViscosityCumulantK17CompChim(); TurbulentViscosityCumulantK17CompChim();
TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level); TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level);
......
...@@ -372,6 +372,12 @@ struct LBMSimulationParameter { ...@@ -372,6 +372,12 @@ struct LBMSimulationParameter {
uint numberOfFluidNodes; uint numberOfFluidNodes;
uint *fluidNodeIndicesBorder; uint *fluidNodeIndicesBorder;
uint numberOfFluidNodesBorder; uint numberOfFluidNodesBorder;
uint *indicesWithMacroscopicVariableOutput;
uint numberOfIndicesWithMacroscopicVariableOutput;
uint *indicesWithApplyBodyForce;
uint numberOfIndicesWithApplyBodyForce;
uint *indicesWithMacroscopicVariableOutputAndApplyBodyForce;
uint numberOfIndicesWithMacroscopicVariableOutputAndApplyBodyForce;
}; };
//! \brief Class for LBM-parameter management //! \brief Class for LBM-parameter management
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment