diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp index 7d2d79d8c21f1966a5b2369f2649df2efc3b4bcb..7ec203e78ebfe8958c87882b91d91049527b45c5 100644 --- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp +++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp @@ -150,10 +150,10 @@ void multipleLevel(const std::string& configPath) } const bool readPrecursor = config.getValue("readPrecursor", false); - int nTReadPrecursor; + int timestepsBetweenReadsPrecursor; if(readPrecursor) { - nTReadPrecursor = config.getValue<int>("nTimestepsReadPrecursor"); + timestepsBetweenReadsPrecursor = config.getValue<int>("nTimestepsReadPrecursor"); precursorDirectory = config.getValue<std::string>("precursorDirectory"); useDistributions = config.getValue<bool>("useDistributions", false); } @@ -235,14 +235,19 @@ void multipleLevel(const std::string& configPath) bool isLastSubDomain = (procID == nProcs-1 && nProcs > 1)? true: false; bool isMidSubDomain = (!isFirstSubDomain && !isLastSubDomain && nProcs > 1)? true: false; - if(isFirstSubDomain || isMidSubDomain) + if(isFirstSubDomain) { xGridMax += overlap; - // xGridMin -= overlap; + if(!readPrecursor) xGridMin -= overlap; } - if(isLastSubDomain || isMidSubDomain) + if(isLastSubDomain) { - // xGridMax += overlap; + xGridMin -= overlap; + if(!readPrecursor) xGridMax += overlap; + } + if(isMidSubDomain) + { + xGridMax += overlap; xGridMin -= overlap; } @@ -252,8 +257,7 @@ void multipleLevel(const std::string& configPath) { gridBuilder->setNumberOfLayers(4,0); real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax; //Stop refinement some distance before outlet if domain ist not periodic - // gridBuilder->addGrid( new Cuboid( xGridMin+dx, 0.f, 0.f, xMaxRefinement, L_y, 0.5*L_z) , 1 ); - gridBuilder->addGrid( new Cuboid( 0.f, 0.f, 0.f, 5000.0, L_y, 0.5*L_z) , 1 ); + gridBuilder->addGrid( new Cuboid( xGridMin, 0.f, 0.f, xMaxRefinement, L_y, 0.5*L_z) , 1 ); para->setMaxLevel(2); scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); } @@ -302,9 +306,9 @@ void multipleLevel(const std::string& configPath) { if(isFirstSubDomain || nProcs == 1) { - // auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK); - // gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, nTReadPrecursor); - gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0); + auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK); + gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, timestepsBetweenReadsPrecursor); + // gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0); } if(isLastSubDomain || nProcs == 1) diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp index 7c2c41bb8af72422ac0289a9c2ab3784adb5772d..8b3fca3e374f4ca08ab3a3291a49a242b0b2ff70 100644 --- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp +++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp @@ -156,7 +156,7 @@ void VTKFile::unloadFile() this->loaded = false; } -void VTKFile::getData(real* data, uint numberOfNodes, std::vector<uint> readIndeces, std::vector<uint> writeIndices, uint offsetRead, uint offsetWrite) +void VTKFile::getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite) { if(!this->loaded) loadFile(); @@ -167,7 +167,7 @@ void VTKFile::getData(real* data, uint numberOfNodes, std::vector<uint> readInde real* quant = &data[j*numberOfNodes]; for(size_t i=0; i<nPoints; i++) { - quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndeces[i]+offsetRead]; + quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndices[i]+offsetRead]; } } } diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h index 7b1145185fe0ea1d515127d46cbd66bdbad7f38d..49b085973a39181cf0cfe23d44e818d3061ed4ca 100644 --- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h +++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h @@ -40,7 +40,7 @@ public: // printFileInfo(); }; - void getData(real* data, uint numberOfNodes, std::vector<uint> readIndeces, std::vector<uint> writeIndices, uint offsetRead, uint offsetWrite); + void getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite); bool markNANs(std::vector<uint> readIndices); bool inBoundingBox(real posX, real posY, real posZ){return inXBounds(posX) && inYBounds(posY) && inZBounds(posZ); }; bool inXBounds(real posX){ return posX<=maxX && posX>=minX; }; diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h index 12e1fb61c81948c88febed8960f0a3a9548fbfb4..22342aec9839afad9bb37b1b11812f6d1750ed7b 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h @@ -337,9 +337,9 @@ public: class PrecursorBoundaryCondition : public gg::BoundaryCondition { public: - static SPtr<PrecursorBoundaryCondition> make(SPtr<TransientBCInputFileReader> reader, int nTRead, real velocityX, real velocityY, real velocityZ) + static SPtr<PrecursorBoundaryCondition> make(SPtr<TransientBCInputFileReader> reader, int timeStepsBetweenReads, real velocityX, real velocityY, real velocityZ) { - return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, nTRead, velocityX, velocityY, velocityZ)); + return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ)); } SPtr<TransientBCInputFileReader> getReader(){ return reader; } @@ -348,13 +348,13 @@ public: real getVelocityZ() { return velocityZ; } private: - PrecursorBoundaryCondition(SPtr<TransientBCInputFileReader> _reader, uint _nTRead, real vx, real vy, real vz) : reader(_reader), nTRead(_nTRead), velocityX(vx), velocityY(vy), velocityZ(vz) { }; + PrecursorBoundaryCondition(SPtr<TransientBCInputFileReader> _reader, uint _timeStepsBetweenReads, real vx, real vy, real vz) : reader(_reader), timeStepsBetweenReads(_timeStepsBetweenReads), velocityX(vx), velocityY(vy), velocityZ(vz) { }; virtual char getType() const override { return vf::gpu::BC_VELOCITY; } public: - uint nTRead; //!> read data every nth timestep + uint timeStepsBetweenReads; //!> read data every nth timestep private: real velocityX = 0.0; diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index 270aff9d8ff9639b8ae0c19451ca90990eba9c63..9b430188235845d416e085f55a4586ee4b7c8f20 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -85,25 +85,6 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition boundaryCondition->patches.push_back(0); } - // else if(boundaryCondition->getType()==vf::gpu::BC_STRESS && (index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID - // || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC - // || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF - // || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC - // || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF - // || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF - // || grid->getFieldEntry(index) == vf::gpu::BC_PRESSURE - // )) - // { - // grid->setFieldEntry(index, boundaryCondition->getType()); - // boundaryCondition->indices.push_back(index); - // setPressureNeighborIndices(boundaryCondition, grid, index); - // setStressSamplingIndices(boundaryCondition, grid, index); - - // setQs(grid, boundaryCondition, index); - - // boundaryCondition->patches.push_back(0); - // } - } } } diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index c9aa7678b9d4391a47707880a2fcb9eb90547864..8c3db0afdf89ceb460b87d6b0b0ccd91db56f428 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -118,7 +118,7 @@ public: virtual void getPrecursorValues(uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, - int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, real& velocityX, real& velocityY, real& velocityZ, int level) const = 0; virtual void getPrecursorQs(real* qs[27], int level) const = 0; @@ -147,18 +147,18 @@ public: virtual void findFluidNodes(bool splitDomain) = 0; - virtual void addFluidNodeIndicesMacroVars(std::vector<uint> fluidNodeIndicesMacroVars, uint level) = 0; - virtual void addFluidNodeIndicesApplyBodyForce(std::vector<uint> fluidNodeIndicesApplyBodyForce, uint level) = 0; - virtual void addFluidNodeIndicesAllFeatures(std::vector<uint> fluidNodeIndicesAllFeatures, uint level) = 0; + virtual void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) = 0; + virtual void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) = 0; + virtual void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) = 0; virtual void sortFluidNodeIndicesMacroVars(uint level) = 0; virtual void sortFluidNodeIndicesApplyBodyForce(uint level) = 0; virtual void sortFluidNodeIndicesAllFeatures(uint level) = 0; - virtual uint getNumberOfFluidNodesMacroVars(unsigned int level) const = 0; - virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const = 0; - virtual uint getNumberOfFluidNodesApplyBodyForce(unsigned int level) const = 0; - virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, const int level) const = 0; - virtual uint getNumberOfFluidNodesAllFeatures(unsigned int level) const = 0; - virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const = 0; + virtual uint getNumberOfFluidNodesMacroVars(uint level) const = 0; + virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, int level) const = 0; + virtual uint getNumberOfFluidNodesApplyBodyForce(uint level) const = 0; + virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, int level) const = 0; + virtual uint getNumberOfFluidNodesAllFeatures(uint level) const = 0; + virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, int level) const = 0; }; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp index 267f03c5aefa809f68af6d9750cf4afbf3cfe3e2..0a31b6f910b9d4cc2449cd422a999f70c4485468 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp @@ -122,7 +122,7 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma //======================================================================================= //! \brief Set stress boundary concdition using iMEM //! \param samplingOffset number of grid points above boundary where velocity for wall model is sampled -//! \param z0 roghness length [m] +//! \param z0 roughness length [m] //! \param dx dx of level 0 [m] //! void LevelGridBuilder::setStressBoundaryCondition( SideType sideType, @@ -251,7 +251,7 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition() } } -void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int nTRead, +void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, real velocityX, real velocityY, real velocityZ, std::vector<uint> fileLevelToGridLevelMap) { if(fileLevelToGridLevelMap.empty()) @@ -271,7 +271,7 @@ void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<Fil for (uint level = 0; level < getNumberOfGridLevels(); level++) { auto reader = createReaderForCollection(fileCollection, fileLevelToGridLevelMap[level]); - SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, nTRead, velocityX, velocityY, velocityZ); + SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ); auto side = SideFactory::make(sideType); @@ -648,20 +648,20 @@ uint LevelGridBuilder::getPrecursorSize(int level) const void LevelGridBuilder::getPrecursorValues( uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, - int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, real& velocityX, real& velocityY, real& velocityZ, int level) const { int allIndicesCounter = 0; int allNodesCounter = 0; - uint tmpNTRead = 0; + uint tmpTimeStepsBetweenReads = 0; size_t tmpNQuantities = 0; for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) { - if( tmpNTRead == 0 ) - tmpNTRead = boundaryCondition->nTRead; - if( tmpNTRead != boundaryCondition->nTRead ) - throw std::runtime_error("All precursor boundary conditions must have the same NTRead value"); + if( tmpTimeStepsBetweenReads == 0 ) + tmpTimeStepsBetweenReads = boundaryCondition->timeStepsBetweenReads; + if( tmpTimeStepsBetweenReads != boundaryCondition->timeStepsBetweenReads ) + throw std::runtime_error("All precursor boundary conditions must have the same timeStepsBetweenReads value"); auto BCreader = boundaryCondition->getReader(); BCreader->setWritingOffset(allIndicesCounter); reader.push_back(BCreader); @@ -690,9 +690,9 @@ void LevelGridBuilder::getPrecursorValues( uint* neighborNT, uint* neighborNB, } numberOfPrecursorNodes = allNodesCounter; - if (tmpNTRead == 0) - throw std::runtime_error("NTRead of precursor needs to be larger than 0."); - nTRead = tmpNTRead; + if (tmpTimeStepsBetweenReads == 0) + throw std::runtime_error("timeStepsBetweenReads of precursor needs to be larger than 0."); + timeStepsBetweenReads = tmpTimeStepsBetweenReads; if (tmpNQuantities == 0) throw std::runtime_error("Number of quantities in precursor needs to be larger than 0."); @@ -798,17 +798,17 @@ void LevelGridBuilder::findFluidNodes(bool splitDomain) } -void LevelGridBuilder::addFluidNodeIndicesMacroVars(std::vector<uint> fluidNodeIndicesMacroVars, uint level) +void LevelGridBuilder::addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) { grids[level]->addFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars); } -void LevelGridBuilder::addFluidNodeIndicesApplyBodyForce(std::vector<uint> fluidNodeIndicesApplyBodyForce, uint level) +void LevelGridBuilder::addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) { grids[level]->addFluidNodeIndicesApplyBodyForce(fluidNodeIndicesApplyBodyForce); } -void LevelGridBuilder::addFluidNodeIndicesAllFeatures(std::vector<uint> fluidNodeIndicesAllFeatures, uint level) +void LevelGridBuilder::addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) { grids[level]->addFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures); } diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index cc6f31800c2e55ca604ff2f223475ba54d3afd56..afaf4118469a3cc935cd862d311629fabd1c0379 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -84,8 +84,9 @@ public: GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho); GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z); GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType); - GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int nTRead, real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f, - std::vector<uint> fileLevelToGridLevelMap = {}); + GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, + real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f, + std::vector<uint> fileLevelToGridLevelMap = {}); GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall); @@ -131,7 +132,7 @@ public: GRIDGENERATOR_EXPORT void getPrecursorValues( uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, - int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, real& velocityX, real& velocityY, real& velocityZ, int level) const override; GRIDGENERATOR_EXPORT virtual void getPrecursorQs(real* qs[27], int level) const override; @@ -211,9 +212,9 @@ public: // needed for CUDA Streams MultiGPU (Communication Hiding) void findFluidNodes(bool splitDomain) override; - void addFluidNodeIndicesMacroVars(std::vector<uint> fluidNodeIndicesMacroVars, uint level) override; - void addFluidNodeIndicesApplyBodyForce(std::vector<uint> fluidNodeIndicesApplyBodyForce, uint level) override; - void addFluidNodeIndicesAllFeatures(std::vector<uint> fluidNodeIndicesAllFeatures, uint level) override; + void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) override; + void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) override; + void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) override; void sortFluidNodeIndicesMacroVars(uint level) override; void sortFluidNodeIndicesApplyBodyForce(uint level) override; diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h index 981245c7098d960ebf6f975bdab8cdf1ecbb6e38..8283bf569e266b84f020334a306d93756b01c394 100644 --- a/src/gpu/GridGenerator/grid/GridImp.h +++ b/src/gpu/GridGenerator/grid/GridImp.h @@ -116,7 +116,7 @@ private: int *sparseIndices; std::vector<uint> fluidNodeIndices; // run on CollisionTemplate::Default - std::vector<uint> fluidNodeIndicesBorder; // run on border nodes + std::vector<uint> fluidNodeIndicesBorder; // run on subdomain border nodes (CollisionTemplate::SubDomainBorder) std::vector<uint> fluidNodeIndicesMacroVars; // run on CollisionTemplate::MacroVars std::vector<uint> fluidNodeIndicesApplyBodyForce; // run on CollisionTemplate::ApplyBodyForce std::vector<uint> fluidNodeIndicesAllFeatures; // run on CollisionTemplate::AllFeatures diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp index 1c279ad523e8442bc68af079f1c789a1b130cd35..49543f37df7fb54290f4ab6c09edb8d10c0b67be 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp @@ -72,16 +72,16 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete //! 1. run collision for nodes which are at the border of the gpus/processes, running with WriteMacroVars in case probes sample on these nodes //! updateGrid->collisionUsingIndices( level, t, - para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Border], - para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Border], + para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder], + para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder], CollisionTemplate::WriteMacroVars, - CudaStreamIndex::Border); + CudaStreamIndex::SubDomainBorder); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! - updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::Border); + updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::Border); + para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder); //! 3. launch the collision kernel for bulk nodes. This includes nodes with \param tag Default, WriteMacroVars, ApplyBodyForce, //! or AllFeatures. All assigned tags are listed in \param allocatedBulkFluidNodeTags during initialization in Simulation::init @@ -97,5 +97,5 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete CudaStreamIndex::Bulk); } //! 4. exchange information between GPUs - updateGrid->exchangeMultiGPU(level, CudaStreamIndex::Border); + updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp index fe7da04d4074a37321f4340f1d76f0606963b89e..b8ca4e9c2020e17cd0192267ac5d931b510afc3a 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp @@ -42,27 +42,27 @@ void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *u //! //! 1. Interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::Border); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! - updateGrid->prepareExchangeMultiGPUAfterFtoC(level, CudaStreamIndex::Border); + updateGrid->prepareExchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder); if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::Border); + para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder); //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine) //! para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk); - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::Border); - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::Border); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder); //! 4. exchange information between GPUs (only nodes which are part of the interpolation) //! - updateGrid->exchangeMultiGPUAfterFtoC(level, CudaStreamIndex::Border); + updateGrid->exchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder); // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::Border); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder); cudaDeviceSynchronize(); } @@ -73,27 +73,27 @@ void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *up //! //! 1. interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::Border); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! - updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::Border); + updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::Border); + para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder); //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine) //! para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk); - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::Border); - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::Border); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder); //! 4. exchange information between GPUs (all nodes) //! - updateGrid->exchangeMultiGPU(level, CudaStreamIndex::Border); + updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::Border); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder); cudaDeviceSynchronize(); } diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 44202b71686df31ba9d6e2dd6f6cf18453fd3408..00a7b45668e2050467f3d1122455dc74d0ad4f1c 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -240,7 +240,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost) { - cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::Border); + cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index a89b5a1ca821ce794263f77fcc6772d169879401..c3dd9e41495645d06e54514cc25eb336ed6fc9c5 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -120,11 +120,11 @@ void GridGenerator::allocArrays_taggedFluidNodes() { if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0) para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag); break; - case CollisionTemplate::Border: - this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesBorder(level), CollisionTemplate::Border, level); - cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::Border, level); - builder->getFluidNodeIndicesBorder(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::Border], level); - cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::Border, level); + case CollisionTemplate::SubDomainBorder: + this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesBorder(level), CollisionTemplate::SubDomainBorder, level); + cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level); + builder->getFluidNodeIndicesBorder(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder], level); + cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level); break; case CollisionTemplate::WriteMacroVars: this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesMacroVars(level), CollisionTemplate::WriteMacroVars, level); @@ -157,7 +157,7 @@ void GridGenerator::allocArrays_taggedFluidNodes() { VF_LOG_INFO("Number of tagged nodes on level {}:", level); VF_LOG_INFO("Default: {}, Border: {}, WriteMacroVars: {}, ApplyBodyForce: {}, AllFeatures: {}", para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default], - para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Border], + para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder], para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::WriteMacroVars], para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::ApplyBodyForce], para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::AllFeatures] ); @@ -177,8 +177,8 @@ void GridGenerator::tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices builder->addFluidNodeIndicesAllFeatures( taggedFluidNodeIndices, level ); break; case CollisionTemplate::Default: - case CollisionTemplate::Border: - throw std::runtime_error("Cannot tag fluid nodes as Default or Border!"); + case CollisionTemplate::SubDomainBorder: + throw std::runtime_error("Cannot tag fluid nodes as Default or SubDomainBorder!"); default: throw std::runtime_error("Tagging fluid nodes with invald tag!"); break; @@ -347,13 +347,13 @@ void GridGenerator::allocArrays_BoundaryValues() para->getParH(level)->precursorBC.weightsNT, para->getParH(level)->precursorBC.weightsNB, para->getParH(level)->precursorBC.weightsST, para->getParH(level)->precursorBC.weightsSB, para->getParH(level)->precursorBC.k, para->getParH(level)->transientBCInputFileReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes, - para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.nTRead, + para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.timeStepsBetweenReads, para->getParH(level)->precursorBC.velocityX, para->getParH(level)->precursorBC.velocityY, para->getParH(level)->precursorBC.velocityZ, level); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// para->getParD(level)->precursorBC.numberOfPrecursorNodes = para->getParH(level)->precursorBC.numberOfPrecursorNodes; para->getParD(level)->precursorBC.numberOfQuantities = para->getParH(level)->precursorBC.numberOfQuantities; - para->getParD(level)->precursorBC.nTRead = para->getParH(level)->precursorBC.nTRead; + para->getParD(level)->precursorBC.timeStepsBetweenReads = para->getParH(level)->precursorBC.timeStepsBetweenReads; para->getParD(level)->precursorBC.velocityX = para->getParH(level)->precursorBC.velocityX; para->getParD(level)->precursorBC.velocityY = para->getParH(level)->precursorBC.velocityY; para->getParD(level)->precursorBC.velocityZ = para->getParH(level)->precursorBC.velocityZ; @@ -380,7 +380,7 @@ void GridGenerator::allocArrays_BoundaryValues() para->getParD(level)->precursorBC.next = tmp; //read second timestep of precursor into next and copy next to device - real nextTime = para->getParD(level)->precursorBC.nTRead*pow(2,-((real)level))*para->getTimeRatio(); + real nextTime = para->getParD(level)->precursorBC.timeStepsBetweenReads*pow(2,-((real)level))*para->getTimeRatio(); for(auto reader : para->getParH(level)->transientBCInputFileReader) { reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, nextTime); diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h index f7cd7cc1a0dd1fc941547b5d102719a82eef9ca2..c6877cbfeffe5b32c0c2d336e46b02d68cd946a3 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h +++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h @@ -48,7 +48,7 @@ class Parameter; using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>; using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>; -using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real tRatio, real velocityRatio)>; +using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real timeRatio, real velocityRatio)>; class BoundaryConditionFactory { diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index 253960f96d13cde56caffcf803b8c9650880a001..8807c22300af92b294976c3d59317f4118e0b72e 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -530,7 +530,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXIndex(int lev, unsigned int proc void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv) { - if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border)) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, @@ -540,12 +540,12 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int proce parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice, - parameter->getStreamManager()->getStream(CudaStreamIndex::Border))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend) { - if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border)) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, @@ -555,7 +555,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost, - parameter->getStreamManager()->getStream(CudaStreamIndex::Border))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor) { @@ -600,7 +600,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYIndex(int lev, unsigned int proc } void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv) { - if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border)) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, @@ -610,11 +610,11 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int proce parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice, - parameter->getStreamManager()->getStream(CudaStreamIndex::Border))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend) { - if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border)) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, @@ -624,7 +624,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost, - parameter->getStreamManager()->getStream(CudaStreamIndex::Border))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor) { @@ -670,7 +670,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZIndex(int lev, unsigned int proc void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv) { - if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border)) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, @@ -680,12 +680,12 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int proce parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice, - parameter->getStreamManager()->getStream(CudaStreamIndex::Border))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend) { - if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border)) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, @@ -695,7 +695,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost, - parameter->getStreamManager()->getStream(CudaStreamIndex::Border))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor) { diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index b35e01eb997723eb12f5645857bc230536fe97fe..9ef5057bbf12af887d438e49b974402136fc60c1 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -1266,7 +1266,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, real velocityX, real velocityY, real velocityZ, - real tRatio, + real timeRatio, real velocityRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep); @@ -1292,7 +1292,7 @@ __global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, real velocityX, real velocityY, real velocityZ, - real tRatio, + real timeRatio, real velocityRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep); @@ -1314,7 +1314,7 @@ __global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, real* weightsSB, real* fsLast, real* fsNext, - real tRatio, + real timeRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, @@ -1336,7 +1336,7 @@ __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, real* weightsSB, real* fsLast, real* fsNext, - real tRatio, + real timeRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep); diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index ae51ce98e449bd3251f6b4df2a3d430815892a8a..06628edb5e3d47f5b0ed44ce61ec6da134c551da 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -3208,7 +3208,7 @@ void VelSchlaffer27( unsigned int numberOfThreads, getLastCudaError("VelSchlaff27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio) { vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); @@ -3220,12 +3220,12 @@ void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPre boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, boundaryCondition->last, boundaryCondition->current, boundaryCondition->velocityX, boundaryCondition->velocityY, boundaryCondition->velocityZ, - tRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + timeRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); } ////////////////////////////////////////////////////////////////////////// -void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio) { vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); @@ -3236,12 +3236,12 @@ void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBou boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, boundaryCondition->last, boundaryCondition->current, boundaryCondition->velocityX, boundaryCondition->velocityY, boundaryCondition->velocityZ, - tRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + timeRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); getLastCudaError("PrecursorDeviceEQ27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio) { vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); @@ -3251,13 +3251,13 @@ void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPre boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB, boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, boundaryCondition->last, boundaryCondition->current, - tRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + timeRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); } ////////////////////////////////////////////////////////////////////////// -void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio) { vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); @@ -3267,7 +3267,7 @@ void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPr boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB, boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, boundaryCondition->last, boundaryCondition->current, - tRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + timeRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu index 9c7958cdc6bc783ba8aadcd0e19877f491fad085..f089a32955af615bfa5744857e1c7d76e2bb42d3 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu @@ -32,7 +32,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, real velocityX, real velocityY, real velocityZ, - real tRatio, + real timeRatio, real velocityRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep) @@ -89,9 +89,9 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, } // if(k==16300)s printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd); - real VeloX = (velocityX + (1.f-tRatio)*vxLastInterpd + tRatio*vxNextInterpd)/velocityRatio; - real VeloY = (velocityY + (1.f-tRatio)*vyLastInterpd + tRatio*vyNextInterpd)/velocityRatio; - real VeloZ = (velocityZ + (1.f-tRatio)*vzLastInterpd + tRatio*vzNextInterpd)/velocityRatio; + real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio; + real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; + real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio; // From here on just a copy of QVelDeviceCompZeroPress //////////////////////////////////////////////////////////////////////////////// @@ -445,7 +445,7 @@ __global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, real velocityX, real velocityY, real velocityZ, - real tRatio, + real timeRatio, real velocityRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep) @@ -502,9 +502,9 @@ __global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, } // if(k==16300) printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd); - real VeloX = (velocityX + (1.f-tRatio)*vxLastInterpd + tRatio*vxNextInterpd)/velocityRatio; - real VeloY = (velocityY + (1.f-tRatio)*vyLastInterpd + tRatio*vyNextInterpd)/velocityRatio; - real VeloZ = (velocityZ + (1.f-tRatio)*vzLastInterpd + tRatio*vzNextInterpd)/velocityRatio; + real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio; + real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; + real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio; // From here on just a copy of QVelDeviceCompZeroPress //////////////////////////////////////////////////////////////////////////////// @@ -666,7 +666,7 @@ __global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, real* weightsSB, real* fsLast, real* fsNext, - real tRatio, + real timeRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep) { @@ -793,15 +793,15 @@ __global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, unsigned int ktne = KQK; // unsigned int kbsw = neighborZ[ksw]; - dist.f[DIR_P00][ke] = f0LastInterp*(1.f-tRatio) + f0NextInterp*tRatio; - dist.f[DIR_PP0][kne] = f1LastInterp*(1.f-tRatio) + f1NextInterp*tRatio; - dist.f[DIR_PM0][kse] = f2LastInterp*(1.f-tRatio) + f2NextInterp*tRatio; - dist.f[DIR_P0P][kte] = f3LastInterp*(1.f-tRatio) + f3NextInterp*tRatio; - dist.f[DIR_P0M][kbe] = f4LastInterp*(1.f-tRatio) + f4NextInterp*tRatio; - dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-tRatio) + f5NextInterp*tRatio; - dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-tRatio) + f6NextInterp*tRatio; - dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-tRatio) + f7NextInterp*tRatio; - dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-tRatio) + f8NextInterp*tRatio; + dist.f[DIR_P00][ke] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio; + dist.f[DIR_PP0][kne] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio; + dist.f[DIR_PM0][kse] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio; + dist.f[DIR_P0P][kte] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio; + dist.f[DIR_P0M][kbe] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio; + dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio; + dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio; + dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio; + dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -825,7 +825,7 @@ __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, real* weightsSB, real* fsLast, real* fsNext, - real tRatio, + real timeRatio, unsigned long long numberOfLBnodes, bool isEvenTimestep) { @@ -955,15 +955,15 @@ __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, getPointersToSubgridDistances(qs, subgridDistances, sizeQ); real q; - q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][ke] = f0LastInterp*(1.f-tRatio) + f0NextInterp*tRatio; - q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kne] = f1LastInterp*(1.f-tRatio) + f1NextInterp*tRatio; - q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kse] = f2LastInterp*(1.f-tRatio) + f2NextInterp*tRatio; - q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kte] = f3LastInterp*(1.f-tRatio) + f3NextInterp*tRatio; - q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kbe] = f4LastInterp*(1.f-tRatio) + f4NextInterp*tRatio; - q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-tRatio) + f5NextInterp*tRatio; - q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-tRatio) + f6NextInterp*tRatio; - q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-tRatio) + f7NextInterp*tRatio; - q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-tRatio) + f8NextInterp*tRatio; + q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][ke] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio; + q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kne] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio; + q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kse] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio; + q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kte] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio; + q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kbe] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio; + q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio; + q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio; + q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio; + q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h index 7eaf0050b41a0aa67de0a4e592b8a123d9ef3e9f..50b4460d774010ea7d7b98cfa6fa505cdfeb88c2 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h @@ -16,7 +16,7 @@ class Kernel public: virtual ~Kernel() = default; virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIdx=CudaStreamIndex::Legacy) = 0; //if stream == -1: run on default stream + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIdx=CudaStreamIndex::Legacy) = 0; virtual bool checkParameter() = 0; virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu index 54dd11142d9c16063d58330cfe7351394bdfe51c..70b0c4352afee850a4e17243979268bd126b7b4a 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu @@ -15,23 +15,21 @@ std::shared_ptr< CumulantK17<turbulenceModel> > CumulantK17<turbulenceModel>::ge template<TurbulenceModel turbulenceModel> void CumulantK17<turbulenceModel>::run() { - LB_Kernel_CumulantK17 < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads >>>( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default], - para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]); + LB_Kernel_CumulantK17 < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + (unsigned long)para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default], + para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]); getLastCudaError("LB_Kernel_CumulantK17 execution failed"); } @@ -44,89 +42,75 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un switch (collisionTemplate) { case CollisionTemplate::Default: - LB_Kernel_CumulantK17 < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( - para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); + LB_Kernel_CumulantK17 < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + (unsigned long)para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); break; case CollisionTemplate::WriteMacroVars: - LB_Kernel_CumulantK17 < turbulenceModel, true, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( - para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); + LB_Kernel_CumulantK17 < turbulenceModel, true, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + (unsigned long)para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); break; - case CollisionTemplate::Border: + case CollisionTemplate::SubDomainBorder: case CollisionTemplate::AllFeatures: - LB_Kernel_CumulantK17 < turbulenceModel, true, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( - para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); - break; - case CollisionTemplate::ApplyBodyForce: - LB_Kernel_CumulantK17 < turbulenceModel, false, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( - para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); - break; - default: + LB_Kernel_CumulantK17 < turbulenceModel, true, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + (unsigned long)para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; case CollisionTemplate::ApplyBodyForce: + LB_Kernel_CumulantK17 < turbulenceModel, false, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + (unsigned long)para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; default: throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()"); break; } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu index d2b679395a673b5b1d74e3f499a1d53b154d4b89..34a444230019a1c6cfacb18e12fd73607fb2e09a 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu @@ -58,7 +58,6 @@ using namespace vf::lbm::dir; template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce> __global__ void LB_Kernel_CumulantK17( real omega_in, - uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, @@ -71,7 +70,6 @@ __global__ void LB_Kernel_CumulantK17( real SGSconstant, unsigned long numberOfLBnodes, int level, - bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, @@ -695,34 +693,34 @@ __global__ void LB_Kernel_CumulantK17( (dist.f[DIR_MMM])[k_MMM] = f_PPP; } -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); -template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); \ No newline at end of file +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh index 9d56098f5053cc765b12eab0244a890d18209b1b..b8cc9543e9b531c5aa90cb2961416a6cbc52377d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh @@ -6,7 +6,6 @@ template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce > __global__ void LB_Kernel_CumulantK17( real omega_in, - uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, @@ -19,7 +18,6 @@ template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool real SGSconstant, unsigned long numberOfLBnodes, int level, - bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp index 29341d2430ec6eff46065b10c1167f9e871edab8..1310cf09e5ce05270e88cd3a53e4910816bd7628 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp @@ -398,16 +398,16 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana uint t_level = para->getTimeStep(level, t, true); - uint lastTime = (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into last arrays - uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into current arrays - uint nextTime = para->getParD(level)->precursorBC.nPrecursorReads *para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into next arrays + uint lastTime = (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into last arrays + uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into current arrays + uint nextTime = para->getParD(level)->precursorBC.nPrecursorReads *para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into next arrays if(t_level>=currentTime) { //cycle time lastTime = currentTime; currentTime = nextTime; - nextTime += para->getParD(level)->precursorBC.nTRead; + nextTime += para->getParD(level)->precursorBC.timeStepsBetweenReads; //cycle pointers real* tmp = para->getParD(level)->precursorBC.last; @@ -426,6 +426,6 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana para->getParH(level)->precursorBC.nPrecursorReads++; } - real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.nTRead; + real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.timeStepsBetweenReads; precursorBoundaryConditionPost(para->getParD(level).get(), ¶->getParD(level)->precursorBC, tRatio, para->getVelocityRatio()); } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index 904471123a895d65f33c8d91e6c5e5ed0296a9f6..c07769d5c06f4443335564511d449e82462f046f 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -75,9 +75,9 @@ enum class CollisionTemplate { //! - AllFeatures: collision \w write out macroscopic variables AND read and apply body force AllFeatures, //! - Border: collision on border nodes - Border + SubDomainBorder }; -constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::Border}; +constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::SubDomainBorder}; constexpr std::initializer_list<CollisionTemplate> bulk_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures}; struct InitCondition @@ -237,7 +237,7 @@ typedef struct QforPrecursorBC{ int sizeQ; int numberOfPrecursorNodes=0; uint nPrecursorReads=0; - uint nTRead; + uint timeStepsBetweenReads; size_t numberOfQuantities; real* q27[27]; uint* planeNeighborNT, *planeNeighborNB, *planeNeighborST, *planeNeighborSB; diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index b120dd20531e2a4f672f16f59b4e2dfe6decaaf1..84ab84ff93fa7706bcc27d7e61a18f580f3c8dbe 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -139,7 +139,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa ////////////////////////////////////////////////////////////////////////// // CUDA streams if (para->getUseStreams()) { - para->getStreamManager()->registerStream(CudaStreamIndex::Border); + para->getStreamManager()->registerStream(CudaStreamIndex::SubDomainBorder); para->getStreamManager()->registerStream(CudaStreamIndex::Bulk); para->getStreamManager()->launchStreams(); para->getStreamManager()->createCudaEvents(); diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h index 73c6b9fa21999d577349401b31a6183e7b6d7f3b..53e030c0d16116a4edef0135f9ab435c853fd66b 100644 --- a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h +++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h @@ -1,5 +1,43 @@ -#ifndef FILE_WRITER_H -#define FILE_WRITER_H +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file DistributionDebugInspector.h +//! \author Henrik Asmuth +//! \date 13/012/2022 +//! \brief Basic debugging class to print out f's in a certain area of the domain +//! +//! Basic debugging class. Needs to be directly added in UpdateGrid (could potentially also be added as a proper Probe in the future) +//! How to use: Define a part of the domain via min/max x, y, and z. The DistributionDebugInspector will print out all f's in that area. +//! +//======================================================================================= + +#ifndef DISTRIBUTION_INSPECTOR_H +#define DISTRIBUTION_INSPECTOR_H #include "Parameter/Parameter.h" diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp index abe780470f660410ca0e63dff2b3d0f300e7dbda..3cc771e413134e90b0d09d8eeb6dfee791f8a1e2 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp @@ -29,8 +29,6 @@ //======================================================================================= #include "CudaStreamManager.h" #include <helper_cuda.h> -#include <cuda_runtime.h> -#include <cuda.h> #include <iostream> void CudaStreamManager::registerStream(CudaStreamIndex streamIndex) diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h index 06fc5624771e2b2b713ae84c24116ac4c622ec9f..5c59bcd3a5e6178d6e70a63f803caf8e29f32604 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h @@ -39,7 +39,7 @@ enum class CudaStreamIndex { Legacy, Bulk, - Border, + SubDomainBorder, Precursor, ActuatorFarm }; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index f83e50d52e609322780130c17ba9563062bf2834..86b7bc2a058d69ba878d4445953a4dc56e524027 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -372,12 +372,12 @@ struct LBMSimulationParameter { /////////////////////////////////////////////////////// std::map<CollisionTemplate, uint*> taggedFluidNodeIndices = {{CollisionTemplate::Default, nullptr}, - {CollisionTemplate::Border, nullptr}, + {CollisionTemplate::SubDomainBorder,nullptr}, {CollisionTemplate::WriteMacroVars, nullptr}, {CollisionTemplate::ApplyBodyForce, nullptr}, {CollisionTemplate::AllFeatures, nullptr}}; std::map<CollisionTemplate, uint > numberOfTaggedFluidNodes = {{CollisionTemplate::Default, 0}, - {CollisionTemplate::Border, 0}, + {CollisionTemplate::SubDomainBorder,0}, {CollisionTemplate::WriteMacroVars, 0}, {CollisionTemplate::ApplyBodyForce, 0}, {CollisionTemplate::AllFeatures, 0}};