diff --git a/gpu.cmake b/gpu.cmake index cc585bdb58f1fec771e5069dcdb55ecf55257156..5b175ca2a5fe7d289bd948e905ada612413333d2 100644 --- a/gpu.cmake +++ b/gpu.cmake @@ -25,23 +25,8 @@ add_subdirectory(src/gpu/GridGenerator) IF (BUILD_VF_GPU) add_subdirectory(src/gpu/VirtualFluids_GPU) - #add_subdirectory(targets/apps/LBM/lbmTest) - #add_subdirectory(targets/apps/LBM/metisTest) - #add_subdirectory(targets/apps/LBM/Basel) - #add_subdirectory(targets/apps/LBM/BaselNU) - #add_subdirectory(targets/apps/LBM/BaselMultiGPU) - - # add_subdirectory(apps/gpu/LBM/DrivenCavity) - # add_subdirectory(apps/gpu/LBM/SphereGPU) - #add_subdirectory(apps/gpu/LBM/WTG_RUB) - #add_subdirectory(apps/gpu/LBM/gridGeneratorTest) - #add_subdirectory(apps/gpu/LBM/TGV_3D) - #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU) - #add_subdirectory(apps/gpu/LBM/SphereScaling) - #add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU) - #add_subdirectory(apps/gpu/LBM/MusselOyster) - #add_subdirectory(apps/gpu/LBM/Poiseuille) - #add_subdirectory(apps/gpu/LBM/ActuatorLine) + add_subdirectory(apps/gpu/LBM/DrivenCavity) + add_subdirectory(apps/gpu/LBM/SphereGPU) add_subdirectory(apps/gpu/LBM/BoundaryLayer) ELSE() MESSAGE( STATUS "exclude Virtual Fluids GPU." ) diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index 6c7bf8ca1853826d83fb6a713ffe03716bd2cf9a..f99cdcda06f36152c0a3c5861ee35a98ba67ff78 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -40,6 +40,18 @@ using namespace gg; +std::vector<real> Side::getNormal() +{ + std::vector<real> normal; + if(this->getCoordinate()==X_INDEX) + normal = {(real)this->getDirection(), 0.0, 0.0}; + if(this->getCoordinate()==Y_INDEX) + normal = {0.0, (real)this->getDirection(), 0.0}; + if(this->getCoordinate()==Z_INDEX) + normal = {0.0, 0.0, (real)this->getDirection()}; + return normal; +} + void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter) { @@ -49,11 +61,19 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition { const uint index = getIndex(grid, coord, constant, v1, v2); - if ((index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID - || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC - || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF )) + if ((index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID + || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC + || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + + //! Enforce overlap of BCs on edge nodes + || grid->getFieldEntry(index) == vf::gpu::BC_PRESSURE + || grid->getFieldEntry(index) == vf::gpu::BC_VELOCITY + || grid->getFieldEntry(index) == vf::gpu::BC_NOSLIP + || grid->getFieldEntry(index) == vf::gpu::BC_SLIP + || grid->getFieldEntry(index) == vf::gpu::BC_STRESS )) { grid->setFieldEntry(index, boundaryCondition->getType()); boundaryCondition->indices.push_back(index); @@ -152,16 +172,21 @@ void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uin else neighborZ = grid->getLastFluidNode ( coords, 2, grid->getEndZ() ); } + //! Only seting q's that partially point in the Side-normal direction + bool alignedWithNormal = (this->getNormal()[0]*grid->getDirection()[dir * DIMENSION + 0]+ + this->getNormal()[1]*grid->getDirection()[dir * DIMENSION + 1]+ + this->getNormal()[2]*grid->getDirection()[dir * DIMENSION + 2] ) > 0; + uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ ); - if( grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY || - grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID || - grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID ) + if((grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY || + grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID || + grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID) && + alignedWithNormal ) qNode[dir] = 0.5; else qNode[dir] = -1.0; - } - + boundaryCondition->qs.push_back(qNode); } @@ -260,7 +285,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond real coordinateNormal = grid[level]->getStartY() + grid[level]->getDelta(); if( coordinateNormal > grid[0]->getStartY() + grid[0]->getDelta() ) return; - + Side::addIndices(grid[level], boundaryCondition, "y", coordinateNormal, startInner, endInner, startOuter, endOuter); } diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h index 6df6bfccc9a39b80de3ac43d057a03945d035b34..53a763bc562ee978042b28d24856fbcca256c5f9 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h @@ -72,15 +72,17 @@ public: virtual SideType whoAmI() const = 0; + std::vector<real> getNormal(); + protected: - static void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, + void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter); static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); static void setStressSamplingIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); - static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); + void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); private: static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2); diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp index bff054eb174a0f5fa34119deedde6f1c9733d83c..01541f8a4a5faab8d70e9e26b815fa5f79fcaf4d 100644 --- a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp +++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp @@ -132,6 +132,8 @@ boundaryCondition BoundaryConditionFactory::getPressureBoundaryConditionPre() co case PressureBC::OutflowNonReflective: return QPressNoRhoDev27; break; + case PressureBC::OutflowNonReflectivePressureCorrection: + return QPressZeroRhoOutflowDev27; default: return nullptr; } diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h index 52df58744641344c97e1b6f8ff964b75c22fec48..7babebecf183744bc6ace6e687f35fad1c7e2e92 100644 --- a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h +++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h @@ -109,6 +109,8 @@ public: PressureNonEquilibriumCompressible, //! - OutflowNonReflective = outflow boundary condition, should be combined with VelocityAndPressureCompressible OutflowNonReflective, + //! - OutflowNonreflectivePressureCorrection = like OutflowNonReflective, but also reduces pressure overshoot + OutflowNonReflectivePressureCorrection, //! - NotSpecified = the user did not set a boundary condition NotSpecified }; diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index 2335eb41f3a46ec04385220bf67930d7295162ad..ba7204f208184f9e53f00c232e254840ea26f7eb 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -122,6 +122,7 @@ void GridGenerator::allocArrays_BoundaryValues() //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// para->getParH(level)->pressureBC.numberOfBCnodes = 0; + para->getParD(level)->outflowPressureCorrectionFactor = para->getOutflowPressureCorrectionFactor(); if (numberOfPressureValues > 1) { blocks = (numberOfPressureValues / para->getParH(level)->numberofthreads) + 1; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index f611075bf5d04e7c47837718cba562cdc4335515..cdd1ac934df3de18f29a15625923d07ba97a843c 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -901,6 +901,8 @@ void QPressDevDirDepBot27(unsigned int numberOfThreads, void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); +void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); + void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); void QPressDevOld27(unsigned int numberOfThreads, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index 1a5c79fb9554d2fc64aea9b6594e79619d703be4..63453b6830fb1aed47251551ab6b3cde4810151c 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -1092,7 +1092,7 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, bool isEvenTimestep); __global__ void QPressNoRhoDevice27( real* rhoBC, - real* DD, + real* distributions, int* k_Q, int* k_N, int numberOfBCnodes, @@ -1100,8 +1100,23 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction); + +__global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor); __global__ void QInflowScaleByPressDevice27( real* rhoBC, real* DD, diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index ff70727d52286d287039e88f7b1956c10a6900f9..825785c18eaa7a23271f4da646cf4c037672732c 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -3355,7 +3355,7 @@ void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, Q { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - + QSlipDeviceComp27TurbViscosity<<< grid, threads >>> ( parameterDevice->distributions.f[0], boundaryCondition->k, @@ -3395,7 +3395,7 @@ void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditi { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - + QSlipDeviceComp27<<< grid, threads >>> ( parameterDevice->distributions.f[0], boundaryCondition->k, @@ -3804,10 +3804,33 @@ void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryCondi parameterDevice->neighborY, parameterDevice->neighborZ, parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); + parameterDevice->isEvenTimestep, + vf::lbm::dir::DIR_P00); getLastCudaError("QPressNoRhoDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// +void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) +{ + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + + QPressZeroRhoOutflowDevice27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep, + vf::lbm::dir::DIR_P00, + parameterDevice->outflowPressureCorrectionFactor); + getLastCudaError("QPressZeroRhoOutflowDev27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu index ccb2ce79c63515e59e4f9ae75016f44ced71a170..29e82196bdc2a22f03306b97a1ffd1bb6d5bc8a4 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu @@ -2,6 +2,9 @@ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" +#include "lbm/MacroscopicQuantities.h" +#include "Kernel/Utilities/DistributionHelper.cuh" + #include "KernelUtilities.h" using namespace vf::lbm::constant; @@ -2793,12 +2796,14 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, - - +__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real cs) +{ + return f1[dir] * cs + (c1o1 - cs) * f[dir]; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressNoRhoDevice27( real* rhoBC, - real* DD, +__global__ void QPressNoRhoDevice27( real* rhoBC, + real* distributions, int* k_Q, int* k_N, int numberOfBCnodes, @@ -2806,238 +2811,176 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - const unsigned k = nx*(ny*z + y) + x; + const unsigned k = vf::gpu::getNodeIndex(); ////////////////////////////////////////////////////////////////////////// - if(k<numberOfBCnodes) - { - //////////////////////////////////////////////////////////////////////////////// - //index - unsigned int KQK = k_Q[k]; - //unsigned int kzero= KQK; - unsigned int ke = KQK; - unsigned int kw = neighborX[KQK]; - unsigned int kn = KQK; - unsigned int ks = neighborY[KQK]; - unsigned int kt = KQK; - unsigned int kb = neighborZ[KQK]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = KQK; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = KQK; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = KQK; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = KQK; - unsigned int kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////// - //index1 - unsigned int K1QK = k_N[k]; - //unsigned int k1zero= K1QK; - unsigned int k1e = K1QK; - unsigned int k1w = neighborX[K1QK]; - unsigned int k1n = K1QK; - unsigned int k1s = neighborY[K1QK]; - unsigned int k1t = K1QK; - unsigned int k1b = neighborZ[K1QK]; - unsigned int k1sw = neighborY[k1w]; - unsigned int k1ne = K1QK; - unsigned int k1se = k1s; - unsigned int k1nw = k1w; - unsigned int k1bw = neighborZ[k1w]; - unsigned int k1te = K1QK; - unsigned int k1be = k1b; - unsigned int k1tw = k1w; - unsigned int k1bs = neighborZ[k1s]; - unsigned int k1tn = K1QK; - unsigned int k1bn = k1b; - unsigned int k1ts = k1s; - unsigned int k1tse = k1s; - unsigned int k1bnw = k1bw; - unsigned int k1tnw = k1w; - unsigned int k1bse = k1bs; - unsigned int k1tsw = k1sw; - unsigned int k1bne = k1b; - unsigned int k1tne = K1QK; - unsigned int k1bsw = neighborZ[k1sw]; - //////////////////////////////////////////////////////////////////////////////// - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f1_E = (D.f[DIR_P00 ])[k1e ]; - real f1_W = (D.f[DIR_M00 ])[k1w ]; - real f1_N = (D.f[DIR_0P0 ])[k1n ]; - real f1_S = (D.f[DIR_0M0 ])[k1s ]; - real f1_T = (D.f[DIR_00P ])[k1t ]; - real f1_B = (D.f[DIR_00M ])[k1b ]; - real f1_NE = (D.f[DIR_PP0 ])[k1ne ]; - real f1_SW = (D.f[DIR_MM0 ])[k1sw ]; - real f1_SE = (D.f[DIR_PM0 ])[k1se ]; - real f1_NW = (D.f[DIR_MP0 ])[k1nw ]; - real f1_TE = (D.f[DIR_P0P ])[k1te ]; - real f1_BW = (D.f[DIR_M0M ])[k1bw ]; - real f1_BE = (D.f[DIR_P0M ])[k1be ]; - real f1_TW = (D.f[DIR_M0P ])[k1tw ]; - real f1_TN = (D.f[DIR_0PP ])[k1tn ]; - real f1_BS = (D.f[DIR_0MM ])[k1bs ]; - real f1_BN = (D.f[DIR_0PM ])[k1bn ]; - real f1_TS = (D.f[DIR_0MP ])[k1ts ]; - //real f1_ZERO = (D.f[DIR_000])[k1zero]; - real f1_TNE = (D.f[DIR_PPP ])[k1tne ]; - real f1_TSW = (D.f[DIR_MMP ])[k1tsw ]; - real f1_TSE = (D.f[DIR_PMP ])[k1tse ]; - real f1_TNW = (D.f[DIR_MPP ])[k1tnw ]; - real f1_BNE = (D.f[DIR_PPM ])[k1bne ]; - real f1_BSW = (D.f[DIR_MMM ])[k1bsw ]; - real f1_BSE = (D.f[DIR_PMM ])[k1bse ]; - real f1_BNW = (D.f[DIR_MPM ])[k1bnw ]; - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f_E = (D.f[DIR_P00 ])[ke ]; - real f_W = (D.f[DIR_M00 ])[kw ]; - real f_N = (D.f[DIR_0P0 ])[kn ]; - real f_S = (D.f[DIR_0M0 ])[ks ]; - real f_T = (D.f[DIR_00P ])[kt ]; - real f_B = (D.f[DIR_00M ])[kb ]; - real f_NE = (D.f[DIR_PP0 ])[kne ]; - real f_SW = (D.f[DIR_MM0 ])[ksw ]; - real f_SE = (D.f[DIR_PM0 ])[kse ]; - real f_NW = (D.f[DIR_MP0 ])[knw ]; - real f_TE = (D.f[DIR_P0P ])[kte ]; - real f_BW = (D.f[DIR_M0M ])[kbw ]; - real f_BE = (D.f[DIR_P0M ])[kbe ]; - real f_TW = (D.f[DIR_M0P ])[ktw ]; - real f_TN = (D.f[DIR_0PP ])[ktn ]; - real f_BS = (D.f[DIR_0MM ])[kbs ]; - real f_BN = (D.f[DIR_0PM ])[kbn ]; - real f_TS = (D.f[DIR_0MP ])[kts ]; - //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_TNE = (D.f[DIR_PPP ])[ktne ]; - real f_TSW = (D.f[DIR_MMP ])[ktsw ]; - real f_TSE = (D.f[DIR_PMP ])[ktse ]; - real f_TNW = (D.f[DIR_MPP ])[ktnw ]; - real f_BNE = (D.f[DIR_PPM ])[kbne ]; - real f_BSW = (D.f[DIR_MMM ])[kbsw ]; - real f_BSE = (D.f[DIR_PMM ])[kbse ]; - real f_BNW = (D.f[DIR_MPM ])[kbnw ]; - ////////////////////////////////////////////////////////////////////////// + if(k>=numberOfBCnodes) return; - //real vx1, vx2, vx3, drho; - //real vx1, vx2, vx3, drho, drho1; - ////////////////////////////////////////////////////////////////////////// - //Dichte - // drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + - // f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + - // f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); - // drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + - // f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + - // f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); - - ////////////////////////////////////////////////////////////////////////// - //Ux + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + // unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + //index1 + unsigned int K1QK = k_N[k]; + //unsigned int k1zero= K1QK; + unsigned int k1e = K1QK; + unsigned int k1w = neighborX[K1QK]; + unsigned int k1n = K1QK; + unsigned int k1s = neighborY[K1QK]; + unsigned int k1t = K1QK; + unsigned int k1b = neighborZ[K1QK]; + unsigned int k1sw = neighborY[k1w]; + unsigned int k1ne = K1QK; + unsigned int k1se = k1s; + unsigned int k1nw = k1w; + unsigned int k1bw = neighborZ[k1w]; + unsigned int k1te = K1QK; + unsigned int k1be = k1b; + unsigned int k1tw = k1w; + unsigned int k1bs = neighborZ[k1s]; + unsigned int k1tn = K1QK; + unsigned int k1bn = k1b; + unsigned int k1ts = k1s; + unsigned int k1tse = k1s; + unsigned int k1bnw = k1bw; + unsigned int k1tnw = k1w; + unsigned int k1bse = k1bs; + unsigned int k1tsw = k1sw; + unsigned int k1bne = k1b; + unsigned int k1tne = K1QK; + unsigned int k1bsw = neighborZ[k1sw]; + //////////////////////////////////////////////////////////////////////////////// + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + real f[27], f1[27]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f1[DIR_P00] = (dist.f[DIR_P00])[k1e ]; + f1[DIR_M00] = (dist.f[DIR_M00])[k1w ]; + f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n ]; + f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s ]; + f1[DIR_00P] = (dist.f[DIR_00P])[k1t ]; + f1[DIR_00M] = (dist.f[DIR_00M])[k1b ]; + f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne ]; + f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw ]; + f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se ]; + f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw ]; + f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te ]; + f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw ]; + f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be ]; + f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw ]; + f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn ]; + f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs ]; + f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn ]; + f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts ]; + // f1[DIR_000] = (dist.f[DIR_000])[k1zero]; + f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ]; + f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ]; + f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ]; + f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ]; + f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ]; + f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ]; + f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ]; + f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f[DIR_P00] = (dist.f[DIR_P00])[ke ]; + f[DIR_M00] = (dist.f[DIR_M00])[kw ]; + f[DIR_0P0] = (dist.f[DIR_0P0])[kn ]; + f[DIR_0M0] = (dist.f[DIR_0M0])[ks ]; + f[DIR_00P] = (dist.f[DIR_00P])[kt ]; + f[DIR_00M] = (dist.f[DIR_00M])[kb ]; + f[DIR_PP0] = (dist.f[DIR_PP0])[kne ]; + f[DIR_MM0] = (dist.f[DIR_MM0])[ksw ]; + f[DIR_PM0] = (dist.f[DIR_PM0])[kse ]; + f[DIR_MP0] = (dist.f[DIR_MP0])[knw ]; + f[DIR_P0P] = (dist.f[DIR_P0P])[kte ]; + f[DIR_M0M] = (dist.f[DIR_M0M])[kbw ]; + f[DIR_P0M] = (dist.f[DIR_P0M])[kbe ]; + f[DIR_M0P] = (dist.f[DIR_M0P])[ktw ]; + f[DIR_0PP] = (dist.f[DIR_0PP])[ktn ]; + f[DIR_0MM] = (dist.f[DIR_0MM])[kbs ]; + f[DIR_0PM] = (dist.f[DIR_0PM])[kbn ]; + f[DIR_0MP] = (dist.f[DIR_0MP])[kts ]; + // f[DIR_000] = (dist.f[DIR_000])[kzero]; + f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ]; + f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ]; + f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ]; + f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ]; + f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ]; + f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ]; + f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ]; + f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ]; + ////////////////////////////////////////////////////////////////////////// - //vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + //real vx1, vx2, vx3, drho; + //real vx1, vx2, vx3, drho, drho1; + ////////////////////////////////////////////////////////////////////////// + ////Dichte + // drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + + // f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + + // f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); + // drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + // f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + // f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); + + ////////////////////////////////////////////////////////////////////////// + ////Ux + + //vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + // ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + // (f_E - f_W)) /(one + drho); - // vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + //vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + // ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + // (f_N - f_S)) /(one + drho); - // vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + //vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + // (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + // (f_T - f_B)) /(one + drho); - //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); + //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); - // ////////////////////////////////////////////////////////////////////////// - ////real omega = om1; + ////////////////////////////////////////////////////////////////////////// + ////real omega = om1; // real cusq = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); // ////////////////////////////////////////////////////////////////////////// - ////T�st MK - ////if(vx1 < zero) vx1 = zero; + ////T�st MK + ////if(vx1 < zero) vx1 = zero; // ////////////////////////////////////////////////////////////////////////// // real fZERO = c8over27* (drho1-(one + drho1)*(cusq)) ; // real fE = c2over27* (drho1+(one + drho1)*(three*( vx1 )+c9over2*( vx1 )*( vx1 )-cusq)); @@ -3050,10 +2993,75 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, // real fSW = c1over54* (drho1+(one + drho1)*(three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cusq)); // real fSE = c1over54* (drho1+(one + drho1)*(three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cusq)); // real fNW = c1over54* (drho1+(one + drho1)*(three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cusq)); - // real fTE = c1over54* (drho1+(one + drho1)*(three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cusq)); - // real fBW = c1over54* (drho1+(one + drho1)*(three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cusq)); - // real fBE = c1over54* (drho1+(one + drho1)*(three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cusq)); - // real fTW = c1over54* (drho1+(one + drho1)*(three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); + // real fTE ///////////////////////////////////////////////////////////// + //with velocity + //if(true){//vx1 >= zero){ + // real csMvx = one / sqrtf(three) - vx1; + // //real csMvy = one / sqrtf(three) - vx2; + // /////////////////////////////////////////// + // // X + // f_W = f1_W * csMvx + (one - csMvx) * f_W ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_NW = f1_NW * csMvx + (one - csMvx) * f_NW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_SW = f1_SW * csMvx + (one - csMvx) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_TW = f1_TW * csMvx + (one - csMvx) * f_TW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_BW = f1_BW * csMvx + (one - csMvx) * f_BW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // /////////////////////////////////////////// + // // Y + // //f_S = f1_S * csMvy + (one - csMvy) * f_S ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_S = f1_S * csMvy + (one - csMvy) * f_S; + // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE; + // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW; + // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS; + // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS; + // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE; + // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW; + // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE; + // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW; + // ////////////////////////////////////////////////////////////////////////// + //} + //else + //{ + // /////////////////////////////////////////// + // // X + // vx1 = vx1 * 0.9; + // f_W = f_E - six * c2over27 * ( vx1 ); + // f_NW = f_SE - six * c1over54 * ( vx1-vx2 ); + // f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); + // f_TW = f_BE - six * c1over54 * ( vx1 -vx3); + // f_BW = f_TE - six * c1over54 * ( vx1 +vx3); + // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3); + // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); + // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3); + // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); + // /////////////////////////////////////////// + // // Y + // //vx2 = vx2 * 0.9; + // //f_S = f_N - six * c2over27 * ( vx2 ); + // //f_SE = f_NW - six * c1over54 * (-vx1+vx2 ); + // //f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); + // //f_TS = f_BN - six * c1over54 * ( vx2-vx3); + // //f_BS = f_TN - six * c1over54 * ( vx2+vx3); + // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3); + // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); + // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3); + // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); + // /////////////////////////////////////////// + //} + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // = c1over54* (drho1+(one + drho1)*(three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); // real fTN = c1over54* (drho1+(one + drho1)*(three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cusq)); // real fBS = c1over54* (drho1+(one + drho1)*(three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cusq)); // real fBN = c1over54* (drho1+(one + drho1)*(three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cusq)); @@ -3067,222 +3075,322 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, // real fBSE = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); // real fTNW = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); - real cs = c1o1 / sqrtf(c3o1); - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //no velocity - ////////////////////////////////////////// - f_E = f1_E * cs + (c1o1 - cs) * f_E ; - f_W = f1_W * cs + (c1o1 - cs) * f_W ; - f_N = f1_N * cs + (c1o1 - cs) * f_N ; - f_S = f1_S * cs + (c1o1 - cs) * f_S ; - f_T = f1_T * cs + (c1o1 - cs) * f_T ; - f_B = f1_B * cs + (c1o1 - cs) * f_B ; - f_NE = f1_NE * cs + (c1o1 - cs) * f_NE ; - f_SW = f1_SW * cs + (c1o1 - cs) * f_SW ; - f_SE = f1_SE * cs + (c1o1 - cs) * f_SE ; - f_NW = f1_NW * cs + (c1o1 - cs) * f_NW ; - f_TE = f1_TE * cs + (c1o1 - cs) * f_TE ; - f_BW = f1_BW * cs + (c1o1 - cs) * f_BW ; - f_BE = f1_BE * cs + (c1o1 - cs) * f_BE ; - f_TW = f1_TW * cs + (c1o1 - cs) * f_TW ; - f_TN = f1_TN * cs + (c1o1 - cs) * f_TN ; - f_BS = f1_BS * cs + (c1o1 - cs) * f_BS ; - f_BN = f1_BN * cs + (c1o1 - cs) * f_BN ; - f_TS = f1_TS * cs + (c1o1 - cs) * f_TS ; - f_TNE = f1_TNE * cs + (c1o1 - cs) * f_TNE ; - f_TSW = f1_TSW * cs + (c1o1 - cs) * f_TSW ; - f_TSE = f1_TSE * cs + (c1o1 - cs) * f_TSE ; - f_TNW = f1_TNW * cs + (c1o1 - cs) * f_TNW ; - f_BNE = f1_BNE * cs + (c1o1 - cs) * f_BNE ; - f_BSW = f1_BSW * cs + (c1o1 - cs) * f_BSW ; - f_BSE = f1_BSE * cs + (c1o1 - cs) * f_BSE ; - f_BNW = f1_BNW * cs + (c1o1 - cs) * f_BNW ; - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //with velocity - //if(true){//vx1 >= zero){ - // real csMvx = one / sqrtf(three) - vx1; - // //real csMvy = one / sqrtf(three) - vx2; - // /////////////////////////////////////////// - // // X - // f_W = f1_W * csMvx + (one - csMvx) * f_W ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_NW = f1_NW * csMvx + (one - csMvx) * f_NW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_SW = f1_SW * csMvx + (one - csMvx) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TW = f1_TW * csMvx + (one - csMvx) * f_TW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BW = f1_BW * csMvx + (one - csMvx) * f_BW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // /////////////////////////////////////////// - // // Y - // //f_S = f1_S * csMvy + (one - csMvy) * f_S ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_S = f1_S * csMvy + (one - csMvy) * f_S; - // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE; - // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW; - // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS; - // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS; - // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE; - // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW; - // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE; - // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW; - // ////////////////////////////////////////////////////////////////////////// - //} - //else - //{ - // /////////////////////////////////////////// - // // X - // vx1 = vx1 * 0.9; - // f_W = f_E - six * c2over27 * ( vx1 ); - // f_NW = f_SE - six * c1over54 * ( vx1-vx2 ); - // f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); - // f_TW = f_BE - six * c1over54 * ( vx1 -vx3); - // f_BW = f_TE - six * c1over54 * ( vx1 +vx3); - // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3); - // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); - // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3); - // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); - // /////////////////////////////////////////// - // // Y - // //vx2 = vx2 * 0.9; - // //f_S = f_N - six * c2over27 * ( vx2 ); - // //f_SE = f_NW - six * c1over54 * (-vx1+vx2 ); - // //f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); - // //f_TS = f_BN - six * c1over54 * ( vx2-vx3); - // //f_BS = f_TN - six * c1over54 * ( vx2+vx3); - // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3); - // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); - // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3); - // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); - // /////////////////////////////////////////// - //} - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real cs = c1o1 / sqrtf(c3o1); - ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - ////////////////////////////////////////////////////////////////////////// - //__syncthreads(); - // -X - //(D.f[DIR_P00 ])[ke ] = f_E ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_PP0 ])[kne ] = f_NE ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_P0P ])[kte ] = f_TE ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_PPP ])[ktne ] = f_TNE ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - // X - (D.f[DIR_M00 ])[kw ] = f_W ; - (D.f[DIR_MM0 ])[ksw ] = f_SW ; - (D.f[DIR_MP0 ])[knw ] = f_NW ; - (D.f[DIR_M0M ])[kbw ] = f_BW ; - (D.f[DIR_M0P ])[ktw ] = f_TW ; - (D.f[DIR_MMP ])[ktsw ] = f_TSW ; - (D.f[DIR_MPP ])[ktnw ] = f_TNW ; - (D.f[DIR_MMM ])[kbsw ] = f_BSW ; - (D.f[DIR_MPM ])[kbnw ] = f_BNW ; - // Y - //(D.f[DIR_0M0 ])[ks ] = f_S ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_MM0 ])[ksw ] = f_SW ; - //(D.f[DIR_0MP ])[kts ] = f_TS ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_MMP ])[ktsw ] = f_TSW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - // Z - //(D.f[DIR_00M ])[kb ] = f_B ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_M0M ])[kbw ] = f_BW ; - //(D.f[DIR_0PM ])[kbn ] = f_BN ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - //(D.f[DIR_MPM ])[kbnw ] = f_BNW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + switch(direction) + { + case MZZ: + (dist.f[DIR_P00])[ke ] = computeOutflowDistribution(f, f1, DIR_P00, cs); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, cs); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, cs); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, cs); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + break; + + case PZZ: + (dist.f[DIR_M00])[kw ] = computeOutflowDistribution(f, f1, DIR_M00, cs); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, cs); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, cs); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, cs); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + break; + + case ZMZ: + (dist.f[DIR_0P0])[kn ] = computeOutflowDistribution(f, f1, DIR_0P0, cs); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, cs); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, cs); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, cs); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + break; + + case ZPZ: + (dist.f[DIR_0M0])[ks ] = computeOutflowDistribution(f, f1, DIR_0M0, cs); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, cs); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, cs); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, cs); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + break; + + case ZZM: + (dist.f[DIR_00P])[kt ] = computeOutflowDistribution(f, f1, DIR_00P, cs); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, cs); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, cs); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, cs); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + break; + + case ZZP: + (dist.f[DIR_00M])[kb ] = computeOutflowDistribution(f, f1, DIR_00M, cs); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, cs); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, cs); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, cs); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + break; + default: + break; } } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - - +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight) +{ + return f1[dir ] * cs + (c1o1 - cs) * f[dir ] - weight *rhoCorrection; +} +__global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor) +{ + //////////////////////////////////////////////////////////////////////////////// + const unsigned k = vf::gpu::getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if(k>=numberOfBCnodes) return; + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + //index1 + unsigned int K1QK = k_N[k]; + // unsigned int k1zero= K1QK; + unsigned int k1e = K1QK; + unsigned int k1w = neighborX[K1QK]; + unsigned int k1n = K1QK; + unsigned int k1s = neighborY[K1QK]; + unsigned int k1t = K1QK; + unsigned int k1b = neighborZ[K1QK]; + unsigned int k1sw = neighborY[k1w]; + unsigned int k1ne = K1QK; + unsigned int k1se = k1s; + unsigned int k1nw = k1w; + unsigned int k1bw = neighborZ[k1w]; + unsigned int k1te = K1QK; + unsigned int k1be = k1b; + unsigned int k1tw = k1w; + unsigned int k1bs = neighborZ[k1s]; + unsigned int k1tn = K1QK; + unsigned int k1bn = k1b; + unsigned int k1ts = k1s; + unsigned int k1tse = k1s; + unsigned int k1bnw = k1bw; + unsigned int k1tnw = k1w; + unsigned int k1bse = k1bs; + unsigned int k1tsw = k1sw; + unsigned int k1bne = k1b; + unsigned int k1tne = K1QK; + unsigned int k1bsw = neighborZ[k1sw]; + //////////////////////////////////////////////////////////////////////////////// + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + real f1[27], f[27]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f1[DIR_P00] = (dist.f[DIR_P00])[k1e ]; + f1[DIR_M00] = (dist.f[DIR_M00])[k1w ]; + f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n ]; + f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s ]; + f1[DIR_00P] = (dist.f[DIR_00P])[k1t ]; + f1[DIR_00M] = (dist.f[DIR_00M])[k1b ]; + f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne ]; + f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw ]; + f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se ]; + f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw ]; + f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te ]; + f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw ]; + f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be ]; + f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw ]; + f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn ]; + f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs ]; + f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn ]; + f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts ]; + // f1[DIR_000] = (dist.f[DIR_000])[k1zero]; + f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ]; + f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ]; + f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ]; + f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ]; + f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ]; + f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ]; + f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ]; + f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f[DIR_P00] = (dist.f[DIR_P00])[ke ]; + f[DIR_M00] = (dist.f[DIR_M00])[kw ]; + f[DIR_0P0] = (dist.f[DIR_0P0])[kn ]; + f[DIR_0M0] = (dist.f[DIR_0M0])[ks ]; + f[DIR_00P] = (dist.f[DIR_00P])[kt ]; + f[DIR_00M] = (dist.f[DIR_00M])[kb ]; + f[DIR_PP0] = (dist.f[DIR_PP0])[kne ]; + f[DIR_MM0] = (dist.f[DIR_MM0])[ksw ]; + f[DIR_PM0] = (dist.f[DIR_PM0])[kse ]; + f[DIR_MP0] = (dist.f[DIR_MP0])[knw ]; + f[DIR_P0P] = (dist.f[DIR_P0P])[kte ]; + f[DIR_M0M] = (dist.f[DIR_M0M])[kbw ]; + f[DIR_P0M] = (dist.f[DIR_P0M])[kbe ]; + f[DIR_M0P] = (dist.f[DIR_M0P])[ktw ]; + f[DIR_0PP] = (dist.f[DIR_0PP])[ktn ]; + f[DIR_0MM] = (dist.f[DIR_0MM])[kbs ]; + f[DIR_0PM] = (dist.f[DIR_0PM])[kbn ]; + f[DIR_0MP] = (dist.f[DIR_0MP])[kts ]; + f[DIR_000] = (dist.f[DIR_000])[kzero]; + f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ]; + f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ]; + f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ]; + f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ]; + f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ]; + f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ]; + f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ]; + f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ]; + ////////////////////////////////////////////////////////////////////////// + real drho = vf::lbm::getDensity(f); + + real rhoCorrection = densityCorrectionFactor*drho; + + real cs = c1o1 / sqrtf(c3o1); + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + switch(direction) + { + case MZZ: + (dist.f[DIR_P00])[ke ] = computeOutflowDistribution(f, f1, DIR_P00 , rhoCorrection, cs, c2o27); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216); + break; + + case PZZ: + (dist.f[DIR_M00])[kw ] = computeOutflowDistribution(f, f1, DIR_M00, rhoCorrection, cs, c2o27); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216); + break; + + case ZMZ: + (dist.f[DIR_0P0])[kn ] = computeOutflowDistribution(f, f1, DIR_0P0, rhoCorrection, cs, c2o27); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216); + break; + + case ZPZ: + (dist.f[DIR_0M0])[ks ] =computeOutflowDistribution(f, f1, DIR_0M0, rhoCorrection, cs, c2o27); + (dist.f[DIR_PM0])[kse ] =computeOutflowDistribution(f, f1, DIR_PM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MM0])[ksw ] =computeOutflowDistribution(f, f1, DIR_MM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MP])[kts ] =computeOutflowDistribution(f, f1, DIR_0MP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MM])[kbs ] =computeOutflowDistribution(f, f1, DIR_0MM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PMP])[ktse ] =computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMP])[ktsw ] =computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[kbse ] =computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[kbsw ] =computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216); + break; + + case ZZM: + (dist.f[DIR_00P])[kt ] = computeOutflowDistribution(f, f1, DIR_00P, rhoCorrection, cs, c2o27); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); + break; + + case ZZP: + (dist.f[DIR_00M])[kb ] = computeOutflowDistribution(f, f1, DIR_00M, rhoCorrection, cs, c2o27); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216); + break; + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu index df1dc571cf9627d84e940b2e0f53d55216ca6532..f4167af01eb30b458442057ada098f34998d1a98 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu @@ -38,6 +38,7 @@ #include <cuda_runtime.h> #include <helper_cuda.h> #include "LBM/LB.h" +#include "Kernel/Utilities/DistributionHelper.cuh" using namespace vf::lbm::constant; @@ -64,15 +65,7 @@ __global__ void calcAMD(real* vx, uint size_Mat, real SGSConstant) { - - const uint x = threadIdx.x; - const uint y = blockIdx.x; - const uint z = blockIdx.y; - - const uint nx = blockDim.x; - const uint ny = gridDim.x; - - const uint k = nx*(ny*z + y) + x; + const uint k = vf::gpu::getNodeIndex(); if(k >= size_Mat) return; if(typeOfGridNode[k] != GEO_FLUID) return; @@ -102,7 +95,7 @@ __global__ void calcAMD(real* vx, (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy); - turbulentViscosity[k] = max(c0o1,-SGSConstant*enumerator)/denominator; + turbulentViscosity[k] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1; } void calcTurbulentViscosityAMD(Parameter* para, int level) diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index eea4adfda3c1ef0862f39ef58fc6e065af7bab1b..813b4ccb0d5ca9b0c5e24898dc8feb15691c7386 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -144,6 +144,7 @@ struct InitCondition bool hasWallModelMonitor {false}; bool simulatePorousMedia {false}; bool streetVelocityFile {false}; + real outflowPressureCorrectionFactor {0.0}; }; //Interface Cells diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index dc7d5cb07e573003bfebfa7ef327dddb1f9d4aa4..4123f39f351c4bf41d536bff0d1deea3fbe6e2aa 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -883,6 +883,10 @@ void Parameter::setPressOutZ(unsigned int PressOutZ) { ic.PressOutZ = PressOutZ; } +void Parameter::setOutflowPressureCorrectionFactor(real pressBCrhoCorrectionFactor) +{ + ic.outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor; +} void Parameter::setMaxDev(int maxdev) { ic.maxdev = maxdev; @@ -1906,6 +1910,10 @@ unsigned int Parameter::getPressOutZ() { return ic.PressOutZ; } +real Parameter::getOutflowPressureCorrectionFactor() +{ + return ic.outflowPressureCorrectionFactor; +} int Parameter::getMaxDev() { return ic.maxdev; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 813e6007737bbf402c9d54d5247597275637d096..f2e3966cfc0babfdfdf1fb94a5515b7a0c1f40e1 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -234,6 +234,7 @@ struct LBMSimulationParameter { unsigned int kInletQread, kOutletQread; // DEPRECATED WallModelParameters wallModel; + real outflowPressureCorrectionFactor; // testRoundoffError Distributions27 kDistTestRE; @@ -467,6 +468,7 @@ public: void setpressBcPos(std::string pressBcPos); void setpressBcQs(std::string pressBcQs); void setpressBcValue(std::string pressBcValue); + void setOutflowPressureCorrectionFactor(real correctionFactor); void setpressBcValues(std::string pressBcValues); void setvelBcQs(std::string velBcQs); void setvelBcValues(std::string velBcValues); @@ -849,6 +851,7 @@ public: std::string getOutflowBoundaryNormalX(); std::string getOutflowBoundaryNormalY(); std::string getOutflowBoundaryNormalZ(); + real getOutflowPressureCorrectionFactor(); // CUDA random number curandState *getRandomState(); // Kernel