diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h index 32f57179182b069c3f2804e71e60824e831a4102..d4621562a56e33e632d8e2a9c92dd40fa49f3bbf 100644 --- a/src/gpu/GridGenerator/grid/Grid.h +++ b/src/gpu/GridGenerator/grid/Grid.h @@ -143,7 +143,7 @@ public: // needed for CUDA Streams virtual void findFluidNodeIndices(bool onlyBulk) = 0; - virtual uint getNumberOfFluidNodes() const = 0;; + virtual uint getNumberOfFluidNodes() const = 0; virtual void getFluidNodeIndices(uint *fluidNodeIndices) const = 0; virtual uint getNumberOfFluidNodesBorder() const = 0; diff --git a/src/gpu/GridGenerator/grid/GridImp.cu b/src/gpu/GridGenerator/grid/GridImp.cu index ee54a8b0298a8b01fd923f70e98e47ed0d25eea8..3cb31261714ef6d7b0915bd2049b0db8d4fbf088 100644 --- a/src/gpu/GridGenerator/grid/GridImp.cu +++ b/src/gpu/GridGenerator/grid/GridImp.cu @@ -1974,7 +1974,7 @@ CUDA_HOST void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const uint GridImp::getNumberOfFluidNodesBorder() const { - return this->fluidNodeIndicesBorder.size(); + return (uint)this->fluidNodeIndicesBorder.size(); } void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 445244ac761ed7dfa49f9fcaa385987e31955fa0..9d3bd9030b5f82e100d5d913906b60b257df2e4e 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -27,10 +27,10 @@ void updateGrid27(Parameter* para, ////////////////////////////////////////////////////////////////////////// if (para->useStreams) { - collision(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes); - collision(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, - para->getParD(level)->numberOffluidNodesBorder); + collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, + para->getParD(level)->numberOfFluidNodes, 0); + collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, + para->getParD(level)->numberOffluidNodesBorder, 1); } else collision(para, pm, level, t, kernels); @@ -68,15 +68,30 @@ void updateGrid27(Parameter* para, } } -void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels, uint* fluidNodeIndices, uint numberOfFluidNodes) +void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels) { - if (para->useStreams) - if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0) - kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes); - else - std::cout << "in collision: fluidNodeIndices or numberOfFluidNodes not definded" << std::endl; // better use logger + kernels.at(level)->run(); + + ////////////////////////////////////////////////////////////////////////// + + if (para->getSimulatePorousMedia()) + collisionPorousMedia(para, pm, level); + + ////////////////////////////////////////////////////////////////////////// + + if (para->getDiffOn()) + collisionAdvectionDiffusion(para, level); +} + +void collisionUsingIndex(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, + std::vector<SPtr<Kernel>> &kernels, + uint *fluidNodeIndices, uint numberOfFluidNodes, int stream) +{ + if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0) + kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, stream); else - kernels.at(level)->run(); + std::cout << "in collision: fluidNodeIndices or numberOfFluidNodes not definded" + << std::endl; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 0ce9383b9dc30e9dcdca4586f6d06c2593dbf027..44b02d36fa4a428e3b0ba299df73430ef491ca7a 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -18,7 +18,9 @@ extern "C" void updateGrid27(Parameter* para, unsigned int t, std::vector < SPtr< Kernel>>& kernels); -extern "C" void collision(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0); +extern "C" void collision(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels); + +extern "C" void collisionUsingIndex(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1); extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h index ceb1638adbfb3abf38dae61714a69356b0bb8361..9f9f7539bc5a1e28612d956ca32234c5a3589f8a 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h @@ -13,7 +13,7 @@ class Kernel public: virtual ~Kernel() = default; virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices) = 0; + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0; //if stream == -1: run on default stream virtual bool checkParameter() = 0; virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp index cd06a7c94b2ffce4dcbb20d4f564fa82a56321f2..3151e6bedeb6a96666f11f0040de2c95b20cc42c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp @@ -3,7 +3,7 @@ #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h" -void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices) +void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream) { printf("Method not implemented for this Kernel \n"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h index 0986e7b9759a69cf6df6c9624a3e67fcb8716100..c5215dbdd00e41a76b49c6c67808a6e7c44bb9a8 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h @@ -14,7 +14,7 @@ class KernelImp : public Kernel { public: virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices); + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1); bool checkParameter(); std::vector<PreProcessorType> getPreProcessorTypes(); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu index 6c3555c8f74762de4f9d57053fe0ec10c959893d..afad3118124851461132cef17a4f8a5801ddc28d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu @@ -3,6 +3,8 @@ #include "Parameter/Parameter.h" #include "CumulantK17CompChimSparse_Device.cuh" +#include <cuda.h> + std::shared_ptr<CumulantK17CompChimSparse> CumulantK17CompChimSparse::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -30,25 +32,32 @@ void CumulantK17CompChimSparse::run() getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); } -void CumulantK17CompChimSparse::runOnIndices(const unsigned int *indices, unsigned int size_indices) +void CumulantK17CompChimSparse::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex) { dim3 grid, threads; std::tie(grid, threads) = *calcGridDimensions(para->getParD(level)->numberOfFluidNodes); - LB_Kernel_CumulantK17CompChimSparse<<<grid, threads, 0, para->getStream(0)>>>( + cudaStream_t stream; + if (streamIndex == -1) + stream = CU_STREAM_LEGACY; + else + stream = para->getStream(streamIndex); + + LB_Kernel_CumulantK17CompChimSparse<<<grid, threads, 0, stream>>>( para->getParD(level)->omega, - para->getParD(level)->neighborX_SP, - para->getParD(level)->neighborY_SP, + para->getParD(level)->neighborX_SP, + para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, - para->getParD(level)->d0SP.f[0], - para->getParD(level)->size_Mat_SP, - level, + para->getParD(level)->d0SP.f[0], + para->getParD(level)->size_Mat_SP, + level, para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->evenOrOdd, + para->getQuadricLimitersDev(), + para->getParD(level)->evenOrOdd, indices, - size_indices); + size_indices); getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); + } CumulantK17CompChimSparse::CumulantK17CompChimSparse(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.h index f1645cfaec3ca28b0856bffc0ecb45319746216e..906c880337d1747f96bc7a0f5d5b7988d6235ee5 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.h @@ -7,8 +7,8 @@ class CumulantK17CompChimSparse : public KernelImp { public: static std::shared_ptr<CumulantK17CompChimSparse> getNewInstance(std::shared_ptr<Parameter> para, int level); - void run(); - void runOnIndices(const unsigned int *indices, unsigned int size_indices) override; + void run() override; + void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override; private: CumulantK17CompChimSparse(); diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 34c45493d7ce7e8606c1e96a97e770ba57d1e152..9bbfa66b6f1132a66146bba833ba7c5a5c18d041 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -106,7 +106,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std ////////////////////////////////////////////////////////////////////////// // CUDA streams if(para->useStreams) - para->launchStreams((uint)1); + para->launchStreams((uint)2); ////////////////////////////////////////////////////////////////////////// // //output << para->getNeedInterface().at(0) << "\n"; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 9f70266e4656863ca18fed53afe30196410c5d40..5d24abe4f812e9a898a32c32fcef9aa759b87593 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -43,8 +43,8 @@ #include "VirtualFluids_GPU_export.h" +#include <cuda.h> #include <cuda_runtime.h> -#include <helper_cuda.h> struct curandStateXORWOW; typedef struct curandStateXORWOW curandState;