diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml old mode 100644 new mode 100755 index e171e2e7fbe1984588355f5a833a21160024da32..be6533afdfd82ee2377ddba11837b938ea7cd311 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -235,7 +235,7 @@ gcc_9_python_hpc_test: - hpc-rocket launch --watch Python/SlurmTests/poiseuille/rocket.yml ############################################################################### -multigpu_hpc_test: +regression_test_4gpu: image: python:latest stage: test @@ -252,12 +252,13 @@ multigpu_hpc_test: - pip install "fieldcompare[all]" script: - - hpc-rocket launch --watch regression-tests/multigpu_test/rocket.yml + - hpc-rocket launch --watch regression-tests/multigpu_test/rocket4GPU.yml - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data - cd test_data - - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels + - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels regression_tests/gpu/SphereScaling_4GPU_2Levels - cd .. - - fieldcompare dir output/results test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "*.vtu" + - fieldcompare dir output/4GPU test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "DrivenCavityMultiGPU*.vtu" + - fieldcompare dir output/4GPU test_data/regression_tests/gpu/SphereScaling_4GPU_2Levels --include-files "SphereScaling*.vtu" ############################################################################### ## Benchmark ## diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake index 5ca4d9821d918f66745fc27363975811dc278440..d31d8684a53a769e48408ad5febe7d2c6b22c623 100644 --- a/CMake/cmake_config_files/PHOENIX.config.cmake +++ b/CMake/cmake_config_files/PHOENIX.config.cmake @@ -28,7 +28,7 @@ set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100 set(GPU_APP "apps/gpu/LBM/") list(APPEND USER_APPS - "${GPU_APP}DrivenCavityMultiGPU" + # "${GPU_APP}DrivenCavityMultiGPU" # "${GPU_APP}SphereScaling" # "${GPU_APP}MusselOyster" ) diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp index b4aba451a253135a19472145921d7e7461a89ded..4c91bf148d422838bdfe647b53e40c97166b9f4e 100644 --- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp +++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp @@ -95,18 +95,12 @@ #include "utilities/communication.h" //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - std::string path("."); std::string simulationName("BoundaryLayer"); using namespace vf::lbm::constant; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - void multipleLevel(const std::string& configPath) { @@ -284,7 +278,7 @@ void multipleLevel(const std::string& configPath) gridBuilder->addCoarseGrid( xGridMin, 0.0, 0.0, xGridMax, L_y, L_z, dx); - if(true)// Add refinement + if(false)// Add refinement { gridBuilder->setNumberOfLayers(4,0); real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax; //Stop refinement some distance before outlet if domain ist not periodic @@ -350,13 +344,14 @@ void multipleLevel(const std::string& configPath) gridBuilder->setStressBoundaryCondition(SideType::MZ, 0.0, 0.0, 1.0, // wall normals - samplingOffset, z0, dx); // wall model settinng + samplingOffset, z0, dx); // wall model settinng + para->setHasWallModelMonitor(true); gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0f, 0.0f, -1.0f); bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible); bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack); - bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); + bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity); bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective); bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor); para->setOutflowPressureCorrectionFactor(0.0); diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp old mode 100644 new mode 100755 index 97f2da1f0e6a0e9bc1250382808fb6bf9ea8462a..46e40044d7b3dca0cfc842ef775f680f0922e9b8 --- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp +++ b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp @@ -93,7 +93,7 @@ void multipleLevel(std::filesystem::path& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - const std::string outPath("output/"); + const std::string outPath("output/" + std::to_string(para->getNumprocs()) + "GPU/"); const std::string gridPath = "output/"; std::string simulationName("DrivenCavityMultiGPU"); diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt similarity index 100% rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt rename to apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp old mode 100644 new mode 100755 index f33b5e75db60ea88638c2b2cdc11bdb4999088f5..43103a2f02522f7b3898027f9b807fdb84dd9d5c --- a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp +++ b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp @@ -62,28 +62,6 @@ #include "utilities/communication.h" -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// U s e r s e t t i n g s -// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -// Phoenix -// const std::string outPath("/work/y0078217/Results/SphereScalingResults/"); -// const std::string gridPathParent = "/work/y0078217/Grids/GridSphereScaling/"; -// const std::string simulationName("SphereScaling"); -// const std::string stlPath("/home/y0078217/STL/Sphere/"); - -// Relative Paths -const std::string outPath("./output/SphereScalingResults/"); -const std::string gridPathParent = "./output/grids/SphereScalingResults/"; -const std::string simulationName("SphereScaling"); -const std::string stlPath("./stl/SphereScaling/"); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -119,10 +97,13 @@ void multipleLevel(std::filesystem::path& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool useGridGenerator = true; - bool useLevels = true; + bool useLevels = true; std::string scalingType = "strong"; // "strong" // "weak" - // para->setUseStreams(true); // set in config - // para->useReducedCommunicationAfterFtoC = true; // set in config + + const std::string outPath("output/" + std::to_string(para->getNumprocs()) + "GPU/"); + const std::string simulationName("SphereScaling"); + const std::string gridPath = "./output/grids/"; + const std::string stlPath("./stl/SphereScaling/"); if (para->getNumprocs() == 1) { para->useReducedCommunicationAfterFtoC = false; @@ -130,10 +111,9 @@ void multipleLevel(std::filesystem::path& configPath) if (scalingType != "weak" && scalingType != "strong") std::cerr << "unknown scaling type" << std::endl; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - std::string gridPath(gridPathParent); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file real dxGrid = (real)1.0; - real vxLB = (real)0.0005; // LB units + real vxLB = (real)0.005; // LB units real viscosityLB = 0.001; //(vxLB * dxGrid) / Re; para->setVelocityLB(vxLB); @@ -142,14 +122,9 @@ void multipleLevel(std::filesystem::path& configPath) para->setViscosityRatio((real)0.058823529); para->setDensityRatio((real)998.0); - - // para->setTimestepOut(10); - // para->setTimestepEnd(10); - para->setCalcDragLift(false); para->setUseWale(false); - para->setOutputPrefix(simulationName); if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);} para->setPrintFiles(true); @@ -159,12 +134,8 @@ void multipleLevel(std::filesystem::path& configPath) else para->setMaxLevel(1); - // para->setMainKernel("CumulantK17CompChim"); - para->setMainKernel("CumulantK17CompChimStream"); - //para->setMainKernel("CumulantK17CompChimRedesigned"); - scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleRhoSq); - - + para->setMainKernel("CumulantK17"); + scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58 --- /dev/null +++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt @@ -0,0 +1,17 @@ +################################################## +#GPU Mapping +################################################## +Devices="0 1 2 3" +NumberOfDevices=4 + +################################################## +#simulation parameter +################################################## +TimeEnd=10000 +TimeOut=10000 + +################################################## +# CUDA Streams and optimized communication (only used for multiple GPUs) +################################################## +useStreams = true +useReducedCommunicationInInterpolation = true \ No newline at end of file diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi index 8d715e4b4cd49e6dbf92da3aedddbc4b869067c4..514dc5053e9574b452d80f61eb3d4e1ebb0f4396 100644 --- a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi +++ b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi @@ -67,7 +67,7 @@ class LevelGridBuilder(GridBuilder): def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.VelocityFileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ... def set_pressure_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, rho: float) -> None: ... def set_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float) -> None: ... - def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float) -> None: ... + def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float, q: float) -> None: ... def set_velocity_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, vx: float, vy: float, vz: float) -> None: ... class MultipleGridBuilder(LevelGridBuilder): diff --git a/regression-tests/multigpu_test/rocket.yml b/regression-tests/multigpu_test/rocket4GPU.yml similarity index 72% rename from regression-tests/multigpu_test/rocket.yml rename to regression-tests/multigpu_test/rocket4GPU.yml index f621b1349c042e02f2e834e697147da0822ffe1f..a05ffea6ad04e0d5cfb8d7749111726dfceb4609 100755 --- a/regression-tests/multigpu_test/rocket.yml +++ b/regression-tests/multigpu_test/rocket4GPU.yml @@ -3,8 +3,8 @@ user: $PHOENIX_REMOTE_USER private_keyfile: $PHOENIX_PRIVATE_KEY copy: - - from: regression-tests/multigpu_test/slurm.job - to: multigpu_test/slurm.job + - from: regression-tests/multigpu_test/slurm4GPU.job + to: multigpu_test/slurm4GPU.job overwrite: true - from: "CMake/" @@ -36,13 +36,16 @@ copy: overwrite: true collect: - - from: multigpu_test/output/ - to: output/results/ + - from: multigpu_test/output/4GPU/ + to: output/4GPU overwrite: true - - from: multigpu_test/slurmMultiGPU.out - to: output/slurmMultiGPU.out + - from: multigpu_test/slurm4GPU.out + to: output/4GPU/slurm4GPU.out overwrite: true -sbatch: multigpu_test/slurm.job +clean: + - multigpu_test/output/* + +sbatch: multigpu_test/slurm4GPU.job continue_if_job_fails: true diff --git a/regression-tests/multigpu_test/slurm.job b/regression-tests/multigpu_test/slurm4GPU.job similarity index 61% rename from regression-tests/multigpu_test/slurm.job rename to regression-tests/multigpu_test/slurm4GPU.job index 0ee0df46ab64bab6520f9f46fc939d5b3186fae7..886bfaf7479e01cfef285e9a2dae189258ce0b7e 100755 --- a/regression-tests/multigpu_test/slurm.job +++ b/regression-tests/multigpu_test/slurm4GPU.job @@ -3,10 +3,10 @@ #SBATCH --partition=gpu01_queue #SBATCH --nodes=1 #SBATCH --time=10:00:00 -#SBATCH --job-name=Cavity4GPU +#SBATCH --job-name=Regr4GPU #SBATCH --ntasks-per-node=4 #SBATCH --gres=gpu:4 -#SBATCH --output=multigpu_test/slurmMultiGPU.out +#SBATCH --output=multigpu_test/slurm4GPU.out ##SBATCH --exclusive module purge @@ -21,9 +21,13 @@ module list cd multigpu_test mkdir -p build cd build -cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS="apps/gpu/LBM/DrivenCavityMultiGPU" +cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling make -j 16 cd .. mkdir -p output -mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU.txt" \ No newline at end of file +echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 4 GPUs\n\n' +mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU_regressionTest.txt" + +echo $'\n\n\n\n---Second test: SphereScaling on 4 GPUs\n\n' +mpirun -np 4 "./build/bin/SphereScaling" "configPhoenix4GPU_regressionTest.txt" \ No newline at end of file diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp index 5f3c4ad492b16c09b26acd00a624a54ad65dffda..571796d503a1a73b3eccf631a347884c7522b533 100644 --- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp +++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp @@ -417,7 +417,7 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time) { numberOfFiles++; - printf("switching to precursor file no. %zu\n", numberOfFiles); + VF_LOG_INFO("PrecursorBC on level {}: switching to file no. {}\n", level, numberOfFiles); if(numberOfFiles == this->fileCollection->files[level][id].size()) throw std::runtime_error("Not enough Precursor Files to read"); diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index ba4eea50ffb6bc136528db31207274d626fe9b15..718a8d5da1de148c72ba67dd2d15c5e3b443e16a 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -75,14 +75,13 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF // Overlap of BCs on edge nodes || grid->nodeHasBC(index) ) - { + { grid->setFieldEntry(index, boundaryCondition->getType()); boundaryCondition->indices.push_back(index); setPressureNeighborIndices(boundaryCondition, grid, index); setStressSamplingIndices(boundaryCondition, grid, index); - + // if(grid->getFieldEntry(index)==26) printf("index = %u, v1 = %f, v2 = %f, field entry=%u \n", index, v1, v2, grid->getFieldEntry(index) ); setQs(grid, boundaryCondition, index); - boundaryCondition->patches.push_back(0); } } diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp index 32cf9d07da87149695a5bf548ed357be2b2f71b4..9fc3c099e9f382c8d22fa64626c89219ef29c360 100644 --- a/src/gpu/GridGenerator/grid/GridImp.cpp +++ b/src/gpu/GridGenerator/grid/GridImp.cpp @@ -2115,16 +2115,22 @@ void GridImp::sortFluidNodeIndicesMacroVars() if(this->fluidNodeIndicesAllFeatures.size()>0) { this->fluidNodeIndicesMacroVars.erase( std::remove_if( this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(), - [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), - this->fluidNodeIndicesMacroVars.end() - ); + [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), + this->fluidNodeIndicesMacroVars.end() ); + } + + // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesApplyBodyForce + if(this->fluidNodeIndicesBorder.size()>0) + { + this->fluidNodeIndicesMacroVars.erase( std::remove_if( this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(), + [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ), + this->fluidNodeIndicesMacroVars.end() ); } // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices this->fluidNodeIndices.erase( std::remove_if( this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), [&](auto x){return binary_search(fluidNodeIndicesMacroVars.begin(),fluidNodeIndicesMacroVars.end(),x);} ), - this->fluidNodeIndices.end() - ); + this->fluidNodeIndices.end() ); } } @@ -2136,20 +2142,26 @@ void GridImp::sortFluidNodeIndicesApplyBodyForce() // Remove duplicates this->fluidNodeIndicesApplyBodyForce.erase( unique( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end() ), this->fluidNodeIndicesApplyBodyForce.end() ); - // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars + // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesApplyBodyForce if(this->fluidNodeIndicesAllFeatures.size()>0) { - this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(), + this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(), [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), - this->fluidNodeIndicesApplyBodyForce.end() - ); + this->fluidNodeIndicesApplyBodyForce.end() ); + } + + // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesApplyBodyForce + if(this->fluidNodeIndicesBorder.size()>0) + { + this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(), + [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ), + this->fluidNodeIndicesApplyBodyForce.end() ); } // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices this->fluidNodeIndices.erase( std::remove_if( this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), - [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ), - this->fluidNodeIndices.end() - ); + [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ), + this->fluidNodeIndices.end() ); } } @@ -2160,11 +2172,19 @@ void GridImp::sortFluidNodeIndicesAllFeatures() sort(this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end()); // Remove duplicates this->fluidNodeIndicesAllFeatures.erase( unique( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end() ), this->fluidNodeIndicesAllFeatures.end() ); - // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices + + // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesAllFeatures + if(this->fluidNodeIndicesBorder.size()>0) + { + this->fluidNodeIndicesAllFeatures.erase( std::remove_if( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end(), + [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ), + this->fluidNodeIndicesAllFeatures.end() ); + } + + // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndices this->fluidNodeIndices.erase( std::remove_if( this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), - this->fluidNodeIndices.end() - ); + this->fluidNodeIndices.end() ); } } diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index 38a7eef7e356e2f2da4c1a819d8375035a37313a..c2f86721de26d516ed60f497a65d1d46a34aa182 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -1002,7 +1002,7 @@ void GridGenerator::allocArrays_BoundaryQs() unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes; QforBoundaryConditions &Q = para->getParH(i)->stressBC; getPointersToBoundaryConditions(Q, QQ, sizeQ); - + builder->getStressQs(Q.q27, i); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopyStressBC(i); diff --git a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp index 00a4c79574ce9d8ae372bfe9f7e546c05175bb10..49a6887ef2e462aba190023d334caa0012e2254e 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp +++ b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp @@ -6,7 +6,7 @@ void GridScalingFactory::setScalingFactory(const GridScalingFactory::GridScaling this->gridScaling = gridScalingType; } -gridScalingFC GridScalingFactory::getGridScalingFC() const +gridScalingFC GridScalingFactory::getGridScalingFC(bool hasTurbulentViscosity) const { // for descriptions of the scaling types refer to the header switch (gridScaling) { @@ -14,14 +14,15 @@ gridScalingFC GridScalingFactory::getGridScalingFC() const return ScaleFC_RhoSq_comp_27; break; case GridScaling::ScaleCompressible: - return ScaleFC_compressible; + if(hasTurbulentViscosity) return ScaleFC_compressible<true>; + else return ScaleFC_compressible<false>; break; default: return nullptr; } } -gridScalingCF GridScalingFactory::getGridScalingCF() const +gridScalingCF GridScalingFactory::getGridScalingCF(bool hasTurbulentViscosity) const { // for descriptions of the scaling types refer to the header switch (gridScaling) { @@ -29,8 +30,11 @@ gridScalingCF GridScalingFactory::getGridScalingCF() const return ScaleCF_RhoSq_comp_27; break; case GridScaling::ScaleCompressible: - return ScaleCF_compressible; - break; + { + if(hasTurbulentViscosity) return ScaleCF_compressible<true>; + else return ScaleCF_compressible<false>; + break; + } default: return nullptr; } diff --git a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h index 7d7c20c63a01e2dba6a5578c6520c0ab06894b3c..d760240c2c5ed429799cd89e57704464515a92f5 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h +++ b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h @@ -59,8 +59,8 @@ public: void setScalingFactory(const GridScalingFactory::GridScaling gridScalingType); - [[nodiscard]] gridScalingFC getGridScalingFC() const; - [[nodiscard]] gridScalingCF getGridScalingCF() const; + [[nodiscard]] gridScalingFC getGridScalingFC(bool hasTurbulentViscosity) const; + [[nodiscard]] gridScalingCF getGridScalingCF(bool hasTurbulentViscosity) const; private: GridScaling gridScaling = GridScaling::NotSpecified; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index ae8cbb77ec2493126d64b90a7119cbfa3efee666..4a5b7816c1b6591e4193639bcdf71242e77688c0 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -1624,7 +1624,8 @@ void ScaleCF_staggered_time_comp_27( real* DC, OffCF offCF); void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream); -void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream); + +template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream); void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, real* DF, @@ -1849,7 +1850,8 @@ void ScaleFC_staggered_time_comp_27( real* DC, OffFC offFC); void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream); -void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream); + +template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream); void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, real* DF, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index 3134db44346ee7f465a5c8f04505ee5749482fbf..0c3c7fcefc2bbb7bc87d7d95863c8c74f14735a3 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -1860,7 +1860,7 @@ __global__ void scaleCF_RhoSq_comp_27( real* DC, unsigned int nyF, OffCF offCF); -__global__ void scaleCF_compressible( +template<bool hasTurbulentViscosity> __global__ void scaleCF_compressible( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, @@ -1877,6 +1877,8 @@ __global__ void scaleCF_compressible( unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, + real* turbulentViscosityCoarse, + real* turbulentViscosityFine, OffCF offsetCF); __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, @@ -2263,7 +2265,7 @@ __global__ void scaleFC_RhoSq_comp_27( real* DC, unsigned int nyF, OffFC offFC); -__global__ void scaleFC_compressible( +template<bool hasTurbulentViscosity> __global__ void scaleFC_compressible( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, @@ -2280,6 +2282,8 @@ __global__ void scaleFC_compressible( unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, + real* turbulentViscosityCoarse, + real* turbulentViscosityFine, OffFC offsetFC); __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu index 0724002cffa3a47820664851ffefd1c35dbe0235..84529ef2694b57448291957f7792360088bd954f 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu @@ -218,7 +218,7 @@ __device__ __inline__ void interpolateDistributions( //! // based on scaleCF_RhoSq_comp_27 -__global__ void scaleCF_compressible( +template<bool hasTurbulentViscosity> __global__ void scaleCF_compressible( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, @@ -235,6 +235,8 @@ __global__ void scaleCF_compressible( unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, + real* turbulentViscosityCoarse, + real* turbulentViscosityFine, OffCF offsetCF) { //////////////////////////////////////////////////////////////////////////////// @@ -310,6 +312,8 @@ __global__ void scaleCF_compressible( unsigned int k_0MM = k_base_0MM; unsigned int k_MMM = k_base_MMM; + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); + calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMM, vx1_MMM, vx2_MMM, vx3_MMM, kxyFromfcNEQ_MMM, kyzFromfcNEQ_MMM, kxzFromfcNEQ_MMM, kxxMyyFromfcNEQ_MMM, kxxMzzFromfcNEQ_MMM); @@ -327,6 +331,8 @@ __global__ void scaleCF_compressible( k_0MM = neighborZcoarse[k_0MM]; k_MMM = neighborZcoarse[k_MMM]; + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); + calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMP, vx1_MMP, vx2_MMP, vx3_MMP, kxyFromfcNEQ_MMP, kyzFromfcNEQ_MMP, kxzFromfcNEQ_MMP, kxxMyyFromfcNEQ_MMP, kxxMzzFromfcNEQ_MMP); @@ -344,6 +350,8 @@ __global__ void scaleCF_compressible( k_0MM = k_MMM; k_MMM = neighborXcoarse[k_MMM]; + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); + calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMP, vx1_PMP, vx2_PMP, vx3_PMP, kxyFromfcNEQ_PMP, kyzFromfcNEQ_PMP, kxzFromfcNEQ_PMP, kxxMyyFromfcNEQ_PMP, kxxMzzFromfcNEQ_PMP); @@ -361,6 +369,8 @@ __global__ void scaleCF_compressible( k_0M0 = k_base_MM0; k_MM0 = neighborXcoarse[k_base_MM0]; + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); + calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMM, vx1_PMM, vx2_PMM, vx3_PMM, kxyFromfcNEQ_PMM, kyzFromfcNEQ_PMM, kxzFromfcNEQ_PMM, kxxMyyFromfcNEQ_PMM, kxxMzzFromfcNEQ_PMM); @@ -388,6 +398,8 @@ __global__ void scaleCF_compressible( k_0MM = k_base_0MM; k_MMM = k_base_MMM; + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); + calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPM, vx1_MPM, vx2_MPM, vx3_MPM, kxyFromfcNEQ_MPM, kyzFromfcNEQ_MPM, kxzFromfcNEQ_MPM, kxxMyyFromfcNEQ_MPM, kxxMzzFromfcNEQ_MPM); @@ -404,6 +416,8 @@ __global__ void scaleCF_compressible( k_M0M = neighborZcoarse[k_M0M]; k_0MM = neighborZcoarse[k_0MM]; k_MMM = neighborZcoarse[k_MMM]; + + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPP, vx1_MPP, vx2_MPP, vx3_MPP, @@ -423,11 +437,12 @@ __global__ void scaleCF_compressible( k_0MM = k_MMM; k_MMM = neighborXcoarse[k_MMM]; + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); + calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPP, vx1_PPP, vx2_PPP, vx3_PPP, kxyFromfcNEQ_PPP, kyzFromfcNEQ_PPP, kxzFromfcNEQ_PPP, kxxMyyFromfcNEQ_PPP, kxxMzzFromfcNEQ_PPP); - ////////////////////////////////////////////////////////////////////////// // source node BNE = PPM ////////////////////////////////////////////////////////////////////////// @@ -440,6 +455,8 @@ __global__ void scaleCF_compressible( k_M00 = neighborXcoarse[k_base_M00]; k_0M0 = k_base_MM0; k_MM0 = neighborXcoarse[k_base_MM0]; + + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); calculateMomentsOnSourceNodes( distCoarse, omegaC, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPM, vx1_PPM, vx2_PPM, vx3_PPM, @@ -883,28 +900,6 @@ __global__ void scaleCF_compressible( real y = -c1o4; real z = -c1o4; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////////////// - //! - Set moments (zeroth to sixth order) on destination node - //! - interpolateDistributions( - x, y, z, - m_000, - m_100, m_010, m_001, - m_011, m_101, m_110, m_200, m_020, m_002, - m_111, m_210, m_012, m_201, m_021, m_120, m_102, - m_022, m_202, m_220, m_211, m_121, m_112, - m_122, m_212, m_221, - m_222, - a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110, a_101, a_011, a_111, - b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110, b_101, b_011, b_111, - c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111, - d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111, - LaplaceRho, eps_new, omegaF, - kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage - ); - - ////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors k_base_000 = indicesFineMMM[nodeIndex]; k_base_M00 = neighborXfine [k_base_000]; @@ -924,6 +919,28 @@ __global__ void scaleCF_compressible( k_M0M = k_base_M0M; k_0MM = k_base_0MM; k_MMM = k_base_MMM; + //////////////////////////////////////////////////////////////////////////////// + //! - Set moments (zeroth to sixth order) on destination node + //! + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + + interpolateDistributions( + x, y, z, + m_000, + m_100, m_010, m_001, + m_011, m_101, m_110, m_200, m_020, m_002, + m_111, m_210, m_012, m_201, m_021, m_120, m_102, + m_022, m_202, m_220, m_211, m_121, m_112, + m_122, m_212, m_221, + m_222, + a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110, a_101, a_011, a_111, + b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110, b_101, b_011, b_111, + c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111, + d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111, + LaplaceRho, eps_new, omegaF, + kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage + ); ////////////////////////////////////////////////////////////////////////// //! - Write distributions: style of reading and writing the distributions from/to @@ -968,9 +985,22 @@ __global__ void scaleCF_compressible( x = -c1o4; y = -c1o4; z = c1o4; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Set neighbor indices + k_000 = k_00M; + k_M00 = k_M0M; + k_0M0 = k_0MM; + k_00M = neighborZfine[k_00M]; + k_MM0 = k_MMM; + k_M0M = neighborZfine[k_M0M]; + k_0MM = neighborZfine[k_0MM]; + k_MMM = neighborZfine[k_MMM]; //////////////////////////////////////////////////////////////////////////////// // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + interpolateDistributions( x, y, z, m_000, @@ -988,17 +1018,6 @@ __global__ void scaleCF_compressible( kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage ); - //////////////////////////////////////////////////////////////////////////////////// - // Set neighbor indices - k_000 = k_00M; - k_M00 = k_M0M; - k_0M0 = k_0MM; - k_00M = neighborZfine[k_00M]; - k_MM0 = k_MMM; - k_M0M = neighborZfine[k_M0M]; - k_0MM = neighborZfine[k_0MM]; - k_MMM = neighborZfine[k_MMM]; - ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1038,9 +1057,21 @@ __global__ void scaleCF_compressible( y = -c1o4; z = c1o4; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Set neighbor indices + k_000 = k_M00; + k_M00 = neighborXfine[k_M00]; + k_0M0 = k_MM0; + k_00M = k_M0M; + k_MM0 = neighborXfine[k_MM0]; + k_M0M = neighborXfine[k_M0M]; + k_0MM = k_MMM; + k_MMM = neighborXfine[k_MMM]; //////////////////////////////////////////////////////////////////////////////// // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + interpolateDistributions( x, y, z, m_000, @@ -1058,17 +1089,6 @@ __global__ void scaleCF_compressible( kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage ); - //////////////////////////////////////////////////////////////////////////////////// - // Set neighbor indices - k_000 = k_M00; - k_M00 = neighborXfine[k_M00]; - k_0M0 = k_MM0; - k_00M = k_M0M; - k_MM0 = neighborXfine[k_MM0]; - k_M0M = neighborXfine[k_M0M]; - k_0MM = k_MMM; - k_MMM = neighborXfine[k_MMM]; - ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1107,9 +1127,22 @@ __global__ void scaleCF_compressible( x = c1o4; y = -c1o4; z = -c1o4; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Set neighbor indices + k_00M = k_000; + k_M0M = k_M00; + k_0MM = k_0M0; + k_MMM = k_MM0; + k_000 = k_base_M00; + k_M00 = neighborXfine[k_base_M00]; + k_0M0 = k_base_MM0; + k_MM0 = neighborXfine[k_base_MM0]; //////////////////////////////////////////////////////////////////////////////// // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + interpolateDistributions( x, y, z, m_000, @@ -1127,17 +1160,6 @@ __global__ void scaleCF_compressible( kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage ); - //////////////////////////////////////////////////////////////////////////////////// - // Set neighbor indices - k_00M = k_000; - k_M0M = k_M00; - k_0MM = k_0M0; - k_MMM = k_MM0; - k_000 = k_base_M00; - k_M00 = neighborXfine[k_base_M00]; - k_0M0 = k_base_MM0; - k_MM0 = neighborXfine[k_base_MM0]; - ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1177,25 +1199,6 @@ __global__ void scaleCF_compressible( y = c1o4; z = -c1o4; - //////////////////////////////////////////////////////////////////////////////// - // Set moments (zeroth to sixth orders) on destination node - interpolateDistributions( - x, y, z, - m_000, - m_100, m_010, m_001, - m_011, m_101, m_110, m_200, m_020, m_002, - m_111, m_210, m_012, m_201, m_021, m_120, m_102, - m_022, m_202, m_220, m_211, m_121, m_112, - m_122, m_212, m_221, - m_222, - a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110, a_101, a_011, a_111, - b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110, b_101, b_011, b_111, - c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111, - d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111, - LaplaceRho, eps_new, omegaF, - kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage - ); - ////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors k_base_000 = k_base_0M0; @@ -1218,6 +1221,28 @@ __global__ void scaleCF_compressible( k_0MM = k_base_0MM; k_MMM = k_base_MMM; + //////////////////////////////////////////////////////////////////////////////// + // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + + interpolateDistributions( + x, y, z, + m_000, + m_100, m_010, m_001, + m_011, m_101, m_110, m_200, m_020, m_002, + m_111, m_210, m_012, m_201, m_021, m_120, m_102, + m_022, m_202, m_220, m_211, m_121, m_112, + m_122, m_212, m_221, + m_222, + a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110, a_101, a_011, a_111, + b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110, b_101, b_011, b_111, + c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111, + d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111, + LaplaceRho, eps_new, omegaF, + kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage + ); + ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1256,9 +1281,22 @@ __global__ void scaleCF_compressible( x = -c1o4; y = c1o4; z = c1o4; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Set neighbor indices + k_000 = k_00M; + k_M00 = k_M0M; + k_0M0 = k_0MM; + k_00M = neighborZfine[k_00M]; + k_MM0 = k_MMM; + k_M0M = neighborZfine[k_M0M]; + k_0MM = neighborZfine[k_0MM]; + k_MMM = neighborZfine[k_MMM]; //////////////////////////////////////////////////////////////////////////////// // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + interpolateDistributions( x, y, z, m_000, @@ -1276,17 +1314,6 @@ __global__ void scaleCF_compressible( kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage ); - //////////////////////////////////////////////////////////////////////////////////// - // Set neighbor indices - k_000 = k_00M; - k_M00 = k_M0M; - k_0M0 = k_0MM; - k_00M = neighborZfine[k_00M]; - k_MM0 = k_MMM; - k_M0M = neighborZfine[k_M0M]; - k_0MM = neighborZfine[k_0MM]; - k_MMM = neighborZfine[k_MMM]; - ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1325,9 +1352,22 @@ __global__ void scaleCF_compressible( x = c1o4; y = c1o4; z = c1o4; + //////////////////////////////////////////////////////////////////////////////////// + // Set neighbor indices + k_000 = k_M00; + k_M00 = neighborXfine[k_M00]; + k_0M0 = k_MM0; + k_00M = k_M0M; + k_MM0 = neighborXfine[k_MM0]; + k_M0M = neighborXfine[k_M0M]; + k_0MM = k_MMM; + k_MMM = neighborXfine[k_MMM]; //////////////////////////////////////////////////////////////////////////////// // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + interpolateDistributions( x, y, z, m_000, @@ -1345,17 +1385,6 @@ __global__ void scaleCF_compressible( kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage ); - //////////////////////////////////////////////////////////////////////////////////// - // Set neighbor indices - k_000 = k_M00; - k_M00 = neighborXfine[k_M00]; - k_0M0 = k_MM0; - k_00M = k_M0M; - k_MM0 = neighborXfine[k_MM0]; - k_M0M = neighborXfine[k_M0M]; - k_0MM = k_MMM; - k_MMM = neighborXfine[k_MMM]; - ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1394,9 +1423,22 @@ __global__ void scaleCF_compressible( x = c1o4; y = c1o4; z = -c1o4; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Set neighbor indices + k_00M = k_000; + k_M0M = k_M00; + k_0MM = k_0M0; + k_MMM = k_MM0; + k_000 = k_base_M00; + k_M00 = neighborXfine[k_base_M00]; + k_0M0 = k_base_MM0; + k_MM0 = neighborXfine[k_base_MM0]; //////////////////////////////////////////////////////////////////////////////// // Set moments (zeroth to sixth orders) on destination node + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + interpolateDistributions( x, y, z, m_000, @@ -1414,17 +1456,6 @@ __global__ void scaleCF_compressible( kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage ); - //////////////////////////////////////////////////////////////////////////////////// - // Set neighbor indices - k_00M = k_000; - k_M0M = k_M00; - k_0MM = k_0M0; - k_MMM = k_MM0; - k_000 = k_base_M00; - k_M00 = neighborXfine[k_base_M00]; - k_0M0 = k_base_MM0; - k_MM0 = neighborXfine[k_base_MM0]; - ////////////////////////////////////////////////////////////////////////// // Write distributions (distFine.f[DIR_000])[k_000] = f_000; @@ -1455,3 +1486,7 @@ __global__ void scaleCF_compressible( (distFine.f[DIR_PMM])[k_0MM] = f_PMM; (distFine.f[DIR_MMM])[k_MMM] = f_MMM; } + +template __global__ void scaleCF_compressible<true>( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffCF offsetCF); + +template __global__ void scaleCF_compressible<false>( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffCF offsetCF); \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu index e7d999d108e59bca98bf87b813f9479f1c601266..c89a524c1dd63f426254c395d1e4881a7e96ce7a 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu @@ -46,7 +46,7 @@ using namespace vf::gpu; //! // based on scaleFC_RhoSq_comp_27 -__global__ void scaleFC_compressible( +template<bool hasTurbulentViscosity> __global__ void scaleFC_compressible( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, @@ -63,6 +63,8 @@ __global__ void scaleFC_compressible( unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, + real* turbulentViscosityCoarse, + real* turbulentViscosityFine, OffFC offsetFC) { //////////////////////////////////////////////////////////////////////////////// @@ -138,6 +140,8 @@ __global__ void scaleFC_compressible( unsigned int k_0MM = k_base_0MM; unsigned int k_MMM = k_base_MMM; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMM, vx1_MMM, vx2_MMM, vx3_MMM, kxyFromfcNEQ_MMM, kyzFromfcNEQ_MMM, kxzFromfcNEQ_MMM, kxxMyyFromfcNEQ_MMM, kxxMzzFromfcNEQ_MMM); @@ -155,6 +159,8 @@ __global__ void scaleFC_compressible( k_0MM = neighborZfine[k_0MM]; k_MMM = neighborZfine[k_MMM]; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMP, vx1_MMP, vx2_MMP, vx3_MMP, kxyFromfcNEQ_MMP, kyzFromfcNEQ_MMP, kxzFromfcNEQ_MMP, kxxMyyFromfcNEQ_MMP, kxxMzzFromfcNEQ_MMP); @@ -172,6 +178,8 @@ __global__ void scaleFC_compressible( k_0MM = k_MMM; k_MMM = neighborXfine[k_MMM]; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMP, vx1_PMP, vx2_PMP, vx3_PMP, kxyFromfcNEQ_PMP, kyzFromfcNEQ_PMP, kxzFromfcNEQ_PMP, kxxMyyFromfcNEQ_PMP, kxxMzzFromfcNEQ_PMP); @@ -189,6 +197,8 @@ __global__ void scaleFC_compressible( k_0M0 = k_base_MM0; k_MM0 = neighborXfine[k_base_MM0]; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMM, vx1_PMM, vx2_PMM, vx3_PMM, kxyFromfcNEQ_PMM, kyzFromfcNEQ_PMM, kxzFromfcNEQ_PMM, kxxMyyFromfcNEQ_PMM, kxxMzzFromfcNEQ_PMM); @@ -216,6 +226,8 @@ __global__ void scaleFC_compressible( k_0MM = k_base_0MM; k_MMM = k_base_MMM; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPM, vx1_MPM, vx2_MPM, vx3_MPM, kxyFromfcNEQ_MPM, kyzFromfcNEQ_MPM, kxzFromfcNEQ_MPM, kxxMyyFromfcNEQ_MPM, kxxMzzFromfcNEQ_MPM); @@ -232,6 +244,8 @@ __global__ void scaleFC_compressible( k_M0M = neighborZfine[k_M0M]; k_0MM = neighborZfine[k_0MM]; k_MMM = neighborZfine[k_MMM]; + + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPP, vx1_MPP, vx2_MPP, vx3_MPP, @@ -250,6 +264,8 @@ __global__ void scaleFC_compressible( k_0MM = k_MMM; k_MMM = neighborXfine[k_MMM]; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPP, vx1_PPP, vx2_PPP, vx3_PPP, kxyFromfcNEQ_PPP, kyzFromfcNEQ_PPP, kxzFromfcNEQ_PPP, kxxMyyFromfcNEQ_PPP, kxxMzzFromfcNEQ_PPP); @@ -267,6 +283,8 @@ __global__ void scaleFC_compressible( k_0M0 = k_base_MM0; k_MM0 = neighborXfine[k_base_MM0]; + if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]); + calculateMomentsOnSourceNodes( distFine, omegaF, k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPM, vx1_PPM, vx2_PPM, vx3_PPM, kxyFromfcNEQ_PPM, kyzFromfcNEQ_PPM, kxzFromfcNEQ_PPM, kxxMyyFromfcNEQ_PPM, kxxMzzFromfcNEQ_PPM); @@ -540,6 +558,18 @@ __global__ void scaleFC_compressible( // y = 0.; // z = 0.; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // index of the destination node and its neighbors + k_000 = indicesCoarse000[nodeIndex]; + k_M00 = neighborXcoarse [k_000]; + k_0M0 = neighborYcoarse [k_000]; + k_00M = neighborZcoarse [k_000]; + k_MM0 = neighborYcoarse [k_M00]; + k_M0M = neighborZcoarse [k_M00]; + k_0MM = neighborZcoarse [k_0M0]; + k_MMM = neighborZcoarse [k_MM0]; + //////////////////////////////////////////////////////////////////////////////////// + + if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]); //////////////////////////////////////////////////////////////////////////////// //! - Set macroscopic values on destination node (zeroth and first order moments) @@ -642,19 +672,6 @@ __global__ void scaleFC_compressible( backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz_sq, c9o1, c1o9); backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz_sq, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - // index of the destination node and its neighbors - k_000 = indicesCoarse000[nodeIndex]; - k_M00 = neighborXcoarse [k_000]; - k_0M0 = neighborYcoarse [k_000]; - k_00M = neighborZcoarse [k_000]; - k_MM0 = neighborYcoarse [k_M00]; - k_M0M = neighborZcoarse [k_M00]; - k_0MM = neighborZcoarse [k_0M0]; - k_MMM = neighborZcoarse [k_MM0]; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// //! - Write distributions: style of reading and writing the distributions from/to //! stored arrays dependent on timestep is based on the esoteric twist algorithm @@ -690,3 +707,7 @@ __global__ void scaleFC_compressible( (distCoarse.f[DIR_MMM])[k_MMM] = f_MMM; //////////////////////////////////////////////////////////////////////////////////// } + +template __global__ void scaleFC_compressible<true>( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, unsigned int *neighborYcoarse, unsigned int *neighborZcoarse, unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffFC offsetFC); + +template __global__ void scaleFC_compressible<false>( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, unsigned int *neighborYcoarse, unsigned int *neighborZcoarse, unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffFC offsetFC); \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index 4faea21102b6a68dd9a0aa30e9cecc7eba6051b0..9abac27969e74dc90ecdcc707f4fcb2234010d07 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -4054,12 +4054,12 @@ void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulat getLastCudaError("scaleCF_RhoSq_27 execution failed"); } -void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream) +template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream) { dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellCF->kCF); dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); - scaleCF_compressible<<<grid, threads, 0, stream>>>( + scaleCF_compressible<hasTurbulentViscosity><<<grid, threads, 0, stream>>>( parameterDeviceC->distributions.f[0], parameterDeviceF->distributions.f[0], parameterDeviceC->neighborX, @@ -4076,9 +4076,14 @@ void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati icellCF->kCF, parameterDeviceC->omega, parameterDeviceF->omega, + parameterDeviceC->turbViscosity, + parameterDeviceF->turbViscosity, offsetCF); + getLastCudaError("scaleCF_compressible execution failed"); } +template void ScaleCF_compressible<true>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream); +template void ScaleCF_compressible<false>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream); ////////////////////////////////////////////////////////////////////////// void ScaleCF_RhoSq_3rdMom_comp_27( @@ -4946,12 +4951,12 @@ void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulat getLastCudaError("scaleFC_RhoSq_comp_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream) +template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream) { dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellFC->kFC); dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); - scaleFC_compressible<<<grid, threads, 0, stream>>>( + scaleFC_compressible<hasTurbulentViscosity><<<grid, threads, 0, stream>>>( parameterDeviceC->distributions.f[0], parameterDeviceF->distributions.f[0], parameterDeviceC->neighborX, @@ -4968,9 +4973,15 @@ void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati icellFC->kFC, parameterDeviceC->omega, parameterDeviceF->omega, + parameterDeviceC->turbViscosity, + parameterDeviceF->turbViscosity, offsetFC); + getLastCudaError("scaleFC_compressible execution failed"); } +template void ScaleFC_compressible<true>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream); +template void ScaleFC_compressible<false>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream); + ////////////////////////////////////////////////////////////////////////// void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu index cc8ca53d15ac02686b850a70ab181bb47285a7d1..80e3c273987092aa63e4a2724df0df3df7152145 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu @@ -32,6 +32,7 @@ //====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" +#include "Kernel/Utilities/DistributionHelper.cuh" #include "lbm/constants/NumericConstants.h" #include "LBM/GPUHelperFunctions/KernelUtilities.h" @@ -52,67 +53,8 @@ __global__ void QSlipDevice27( unsigned long long numberOfLBnodes, bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index const unsigned y = blockIdx.x; // Globaler y-Index @@ -237,66 +179,8 @@ __global__ void QSlipDevice27( real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + + D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test //(D.f[DIR_000])[k]=c1o10; @@ -1248,9 +1132,7 @@ __global__ void BBSlipDeviceComp27( //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 dist; - getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); - + Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - Set local subgrid distances (q's) //! @@ -1338,13 +1220,13 @@ __global__ void BBSlipDeviceComp27( (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + (f_T - f_B)) / (c1o1 + drho); - real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho); + // real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho); //////////////////////////////////////////////////////////////////////////////// //! - change the pointer to write the results in the correct array //! - getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, !isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - Multiply the local velocities by the slipLength //! diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu index 3208299e93940dabe52faa7d0b3c684c45596660..0838402693e469efb10be2f9cd59094107383b66 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu @@ -42,6 +42,7 @@ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" +#include "Kernel/Utilities/DistributionHelper.cuh" #include <lbm/constants/NumericConstants.h> #include "LBM/GPUHelperFunctions/KernelUtilities.h" @@ -107,7 +108,7 @@ __host__ __device__ __forceinline__ void iMEM( real _vz_w = vz_w_inst-vDotN_w*wallNormalZ; //Compute wall shear stress tau_w via MOST - real z = (real)samplingOffset[k] + 0.5; //assuming q=0.5, could be replaced by wall distance via wall normal + real z = (real)samplingOffset[k] + q; //assuming q=0.5, could be replaced by wall distance via wall normal real kappa = 0.4; real u_star = vMag_el*kappa/(log(z/z0[k])); if(hasWallModelMonitor) u_star_monitor[k] = u_star; @@ -137,6 +138,7 @@ __host__ __device__ __forceinline__ void iMEM( wallVelocityZ = clipVz > -clipVz? min(clipVz, max(-clipVz, -3.0*F_z*forceFactor)): max(clipVz, min(-clipVz, -3.0*F_z*forceFactor)); } + ////////////////////////////////////////////////////////////////////////////// __global__ void QStressDeviceComp27( real* DD, @@ -172,67 +174,8 @@ __global__ void QStressDeviceComp27( bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true)//get right array of post coll f's - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index const unsigned y = blockIdx.x; // Globaler y-Index @@ -362,66 +305,8 @@ __global__ void QStressDeviceComp27( real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]])); ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) //get adress where incoming f's should be written to - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + + D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Compute incoming f's with zero wall velocity /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -437,7 +322,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); - f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb); + // f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb); + f_W_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_E, f_W, feq, om_turb, drho, c2o27); wallMomentumX += f_E+f_W_in; } @@ -446,7 +332,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); - f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb); + // f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb); + f_E_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_W, f_E, feq, om_turb, drho, c2o27); wallMomentumX -= f_W+f_E_in; } @@ -455,7 +342,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx2; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); - f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb); + // f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb); + f_S_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_N, f_S, feq, om_turb, drho, c2o27); wallMomentumY += f_N+f_S_in; } @@ -464,7 +352,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx2; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); - f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb); + // f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb); + f_N_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_S, f_N, feq, om_turb, drho, c2o27); wallMomentumY -= f_S+f_N_in; } @@ -473,7 +362,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); - f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb); + // f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb); + f_B_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_T, f_B, feq, om_turb, drho, c2o27); wallMomentumZ += f_T+f_B_in; } @@ -482,7 +372,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); - f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb); + // f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb); + f_T_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_B, f_T, feq, om_turb, drho, c2o27); wallMomentumZ -= f_B+f_T_in; } @@ -491,7 +382,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 + vx2; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb); + // f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb); + f_SW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, c2o27); wallMomentumX += f_NE+f_SW_in; wallMomentumY += f_NE+f_SW_in; } @@ -501,7 +393,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 - vx2; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb); + // f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb); + f_NE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, c1o54); wallMomentumX -= f_SW+f_NE_in; wallMomentumY -= f_SW+f_NE_in; } @@ -511,7 +404,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 - vx2; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb); + // f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb); + f_NW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, c1o54); wallMomentumX += f_SE+f_NW_in; wallMomentumY -= f_SE+f_NW_in; } @@ -521,7 +415,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 + vx2; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb); + // f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb); + f_SE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, c1o54); wallMomentumX -= f_NW+f_SE_in; wallMomentumY += f_NW+f_SE_in; } @@ -531,7 +426,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb); + // f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb); + f_BW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, c1o54); wallMomentumX += f_TE+f_BW_in; wallMomentumZ += f_TE+f_BW_in; } @@ -541,7 +437,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb); + // f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb); + f_TE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, c1o54); wallMomentumX -= f_BW+f_TE_in; wallMomentumZ -= f_BW+f_TE_in; } @@ -551,7 +448,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb); + // f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb); + f_TW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, c1o54); wallMomentumX += f_BE+f_TW_in; wallMomentumZ -= f_BE+f_TW_in; } @@ -561,7 +459,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb); + // f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb); + f_BE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, c1o54); wallMomentumX -= f_TW+f_BE_in; wallMomentumZ += f_TW+f_BE_in; } @@ -571,7 +470,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx2 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb); + // f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb); + f_BS_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, c1o54); wallMomentumY += f_TN+f_BS_in; wallMomentumZ += f_TN+f_BS_in; } @@ -581,7 +481,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx2 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb); + // f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb); + f_TN_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BS, f_TN, feq, om_turb, drho, c1o54); wallMomentumY -= f_BS+f_TN_in; wallMomentumZ -= f_BS+f_TN_in; } @@ -591,7 +492,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx2 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb); + // f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb); + f_TS_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, c1o54); wallMomentumY += f_BN+f_TS_in; wallMomentumZ -= f_BN+f_TS_in; } @@ -601,7 +503,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx2 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb); + // f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb); + f_BN_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, c1o54); wallMomentumY -= f_TS+f_BN_in; wallMomentumZ += f_TS+f_BN_in; } @@ -611,7 +514,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 + vx2 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb); + // f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb); + f_BSW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, c1o216); wallMomentumX += f_TNE+f_BSW_in; wallMomentumY += f_TNE+f_BSW_in; wallMomentumZ += f_TNE+f_BSW_in; @@ -622,7 +526,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 - vx2 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb); + // f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb); + f_TNE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BSW, f_TNE, feq, om_turb, drho, c1o216); wallMomentumX -= f_BSW+f_TNE_in; wallMomentumY -= f_BSW+f_TNE_in; wallMomentumZ -= f_BSW+f_TNE_in; @@ -633,7 +538,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 + vx2 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb); + // f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb); + f_TSW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, c1o216); wallMomentumX += f_BNE+f_TSW_in; wallMomentumY += f_BNE+f_TSW_in; wallMomentumZ -= f_BNE+f_TSW_in; @@ -644,7 +550,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 - vx2 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb); + // f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb); + f_BNE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, c1o216); wallMomentumX -= f_TSW+f_BNE_in; wallMomentumY -= f_TSW+f_BNE_in; wallMomentumZ += f_TSW+f_BNE_in; @@ -655,7 +562,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 - vx2 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb); + // f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb); + f_BNW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, c1o216); wallMomentumX += f_TSE+f_BNW_in; wallMomentumY -= f_TSE+f_BNW_in; wallMomentumZ += f_TSE+f_BNW_in; @@ -666,7 +574,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 + vx2 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb); + // f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb); + f_TSE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, c1o216); wallMomentumX -= f_BNW+f_TSE_in; wallMomentumY += f_BNW+f_TSE_in; wallMomentumZ -= f_BNW+f_TSE_in; @@ -677,7 +586,8 @@ __global__ void QStressDeviceComp27( { velocityLB = vx1 - vx2 - vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb); + // f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb); + f_TNW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, c1o216); wallMomentumX += f_BSE+f_TNW_in; wallMomentumY -= f_BSE+f_TNW_in; wallMomentumZ -= f_BSE+f_TNW_in; @@ -688,7 +598,8 @@ __global__ void QStressDeviceComp27( { velocityLB = -vx1 + vx2 + vx3; feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); - f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb); + // f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb); + f_BSE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TNW, f_BSE, feq, om_turb, drho, c1o216); wallMomentumX -= f_TNW+f_BSE_in; wallMomentumY += f_TNW+f_BSE_in; wallMomentumZ += f_TNW+f_BSE_in; @@ -699,7 +610,7 @@ __global__ void QStressDeviceComp27( // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real VeloX=0.0, VeloY=0.0, VeloZ=0.0; - q = 0.5f; + q = q_dirB[k]; real eps = 0.001f; iMEM( k, k_N[k], @@ -974,67 +885,9 @@ __global__ void BBStressDevice27( real* DD, unsigned long long numberOfLBnodes, bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + + Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index const unsigned y = blockIdx.x; // Globaler y-Index @@ -1162,66 +1015,8 @@ __global__ void BBStressDevice27( real* DD, (f_T - f_B)) / (c1o1 + drho); ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + + D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E_in, f_W_in, f_N_in, f_S_in, f_T_in, f_B_in, f_NE_in, f_SW_in, f_SE_in, f_NW_in, f_TE_in, f_BW_in, f_BE_in, f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in; @@ -1445,7 +1240,7 @@ __global__ void BBStressDevice27( real* DD, // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real VeloX=0.0, VeloY=0.0, VeloZ=0.0; - q = 0.5f; + q = q_dirB[k]; real eps = 0.001f; iMEM( k, k_N[k], @@ -1721,67 +1516,8 @@ __global__ void BBStressPressureDevice27( real* DD, unsigned long long numberOfLBnodes, bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index const unsigned y = blockIdx.x; // Globaler y-Index @@ -1909,66 +1645,8 @@ __global__ void BBStressPressureDevice27( real* DD, (f_T - f_B)) / (c1o1 + drho); ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } + D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E_in, f_W_in, f_N_in, f_S_in, f_T_in, f_B_in, f_NE_in, f_SW_in, f_SE_in, f_NW_in, f_TE_in, f_BW_in, f_BE_in, f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in; @@ -2192,7 +1870,7 @@ __global__ void BBStressPressureDevice27( real* DD, // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real VeloX=0.0, VeloY=0.0, VeloZ=0.0; - q = 0.5f; + q = q_dirB[k]; real eps = 0.001f; iMEM( k, k_N[k], diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu index 1ffec96c255b7923f3ee39c01f756abd8cad8862..2044c6ad8d7f242c96479cd060c70b91c1dfb216 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu @@ -607,7 +607,7 @@ __global__ void LB_Kernel_CumulantK17( m_001 = -m_001; //Write to array here to distribute read/write - if(writeMacroscopicVariables) + if(writeMacroscopicVariables || turbulenceModel==TurbulenceModel::AMD) { rho[k_000] = drho; vx[k_000] = vvx; @@ -664,33 +664,33 @@ __global__ void LB_Kernel_CumulantK17( //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - (dist.f[DIR_P00])[k_000] = f_M00; - (dist.f[DIR_M00])[k_M00] = f_P00; - (dist.f[DIR_0P0])[k_000] = f_0M0; - (dist.f[DIR_0M0])[k_0M0] = f_0P0; - (dist.f[DIR_00P])[k_000] = f_00M; - (dist.f[DIR_00M])[k_00M] = f_00P; - (dist.f[DIR_PP0])[k_000] = f_MM0; - (dist.f[DIR_MM0])[k_MM0] = f_PP0; - (dist.f[DIR_PM0])[k_0M0] = f_MP0; - (dist.f[DIR_MP0])[k_M00] = f_PM0; - (dist.f[DIR_P0P])[k_000] = f_M0M; - (dist.f[DIR_M0M])[k_M0M] = f_P0P; - (dist.f[DIR_P0M])[k_00M] = f_M0P; - (dist.f[DIR_M0P])[k_M00] = f_P0M; - (dist.f[DIR_0PP])[k_000] = f_0MM; - (dist.f[DIR_0MM])[k_0MM] = f_0PP; - (dist.f[DIR_0PM])[k_00M] = f_0MP; - (dist.f[DIR_0MP])[k_0M0] = f_0PM; + (dist.f[DIR_P00])[k_000] = f_M00; + (dist.f[DIR_M00])[k_M00] = f_P00; + (dist.f[DIR_0P0])[k_000] = f_0M0; + (dist.f[DIR_0M0])[k_0M0] = f_0P0; + (dist.f[DIR_00P])[k_000] = f_00M; + (dist.f[DIR_00M])[k_00M] = f_00P; + (dist.f[DIR_PP0])[k_000] = f_MM0; + (dist.f[DIR_MM0])[k_MM0] = f_PP0; + (dist.f[DIR_PM0])[k_0M0] = f_MP0; + (dist.f[DIR_MP0])[k_M00] = f_PM0; + (dist.f[DIR_P0P])[k_000] = f_M0M; + (dist.f[DIR_M0M])[k_M0M] = f_P0P; + (dist.f[DIR_P0M])[k_00M] = f_M0P; + (dist.f[DIR_M0P])[k_M00] = f_P0M; + (dist.f[DIR_0PP])[k_000] = f_0MM; + (dist.f[DIR_0MM])[k_0MM] = f_0PP; + (dist.f[DIR_0PM])[k_00M] = f_0MP; + (dist.f[DIR_0MP])[k_0M0] = f_0PM; (dist.f[DIR_000])[k_000] = f_000; - (dist.f[DIR_PPP])[k_000] = f_MMM; - (dist.f[DIR_PMP])[k_0M0] = f_MPM; - (dist.f[DIR_PPM])[k_00M] = f_MMP; - (dist.f[DIR_PMM])[k_0MM] = f_MPP; - (dist.f[DIR_MPP])[k_M00] = f_PMM; - (dist.f[DIR_MMP])[k_MM0] = f_PPM; - (dist.f[DIR_MPM])[k_M0M] = f_PMP; - (dist.f[DIR_MMM])[k_MMM] = f_PPP; + (dist.f[DIR_PPP])[k_000] = f_MMM; + (dist.f[DIR_PMP])[k_0M0] = f_MPM; + (dist.f[DIR_PPM])[k_00M] = f_MMP; + (dist.f[DIR_PMM])[k_0MM] = f_MPP; + (dist.f[DIR_MPP])[k_M00] = f_PMM; + (dist.f[DIR_MMP])[k_MM0] = f_PPM; + (dist.f[DIR_MPM])[k_M0M] = f_PMP; + (dist.f[DIR_MMM])[k_MMM] = f_PPP; } template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp index 2b6a266c0d4e5f523091fa4982eee5d83b2ec675..0841d6931bba32440b47d02c9f83864a80f724be 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp @@ -47,10 +47,10 @@ GridScalingKernelManager::GridScalingKernelManager(SPtr<Parameter> parameter, Gr if(!gridScalingFactory){ throw std::runtime_error("There is more than one level, but no scalingFactory was provided."); } - checkScalingFunction(gridScalingFactory->getGridScalingFC(), this->para->getParD(0)->intFC, "scalingFineToCoarse"); - checkScalingFunction(gridScalingFactory->getGridScalingCF(), this->para->getParD(0)->intCF, "scalingCoarseToFine"); - this->scalingFineToCoarse = gridScalingFactory->getGridScalingFC(); - this->scalingCoarseToFine = gridScalingFactory->getGridScalingCF(); + checkScalingFunction(gridScalingFactory->getGridScalingFC(parameter->getUseTurbulentViscosity()), this->para->getParD(0)->intFC, "scalingFineToCoarse"); + checkScalingFunction(gridScalingFactory->getGridScalingCF(parameter->getUseTurbulentViscosity()), this->para->getParD(0)->intCF, "scalingCoarseToFine"); + this->scalingFineToCoarse = gridScalingFactory->getGridScalingFC(parameter->getUseTurbulentViscosity()); + this->scalingCoarseToFine = gridScalingFactory->getGridScalingCF(parameter->getUseTurbulentViscosity()); } if(this->scalingFineToCoarse == nullptr) diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h index 37208ee59586533fa7f8ffbc269246826ed27fb8..e910f8ac5a71053d927e2531dcb225199d708749 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h @@ -166,6 +166,14 @@ __inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, + (q * (f + fInverse)) / (c1o1 + q); } +__inline__ __device__ real getInterpolatedDistributionForNoSlipWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, + const real& omega, const real& drho, const real weight) +{ + + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + + (q * (f + fInverse)) / (c1o1 + q) - weight * drho; +} + __inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, const real& omega, const real& drho, const real& velocity, const real weight)