diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 844a85d255966a79b9b4744ce17b89b63a1e6155..84feed6dc40bce2b59ad483fb17973703d9ce387 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -75,7 +75,7 @@ clang_10: - export CXX=clang++ ############################################################################### -msvc_16: +msvc_17: stage: build tags: @@ -92,7 +92,7 @@ msvc_16: - git --version - $env:Path += ";C:\Program Files\CMake\bin\" - cmake --version - - $env:Path += ";C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin" + - $env:Path += ";C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin" - MSBuild.exe -version script: @@ -180,14 +180,14 @@ gcc_9_unit_tests: - ctest ############################################################################### -msvc_16_unit_tests: +msvc_17_unit_tests: stage: test tags: - win - gpu - needs: ["msvc_16"] + needs: ["msvc_17"] before_script: - $env:Path += ";C:\Program Files\CMake\bin\" diff --git a/CMakePresets.json b/CMakePresets.json index 5bb07d3cc99d377a877fc30915a88c397815f73c..6e2658d148bddf55950e5849adcf10709a8b8caf 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -14,7 +14,7 @@ { "name": "msvc", "hidden": true, - "generator": "Visual Studio 16 2019", + "generator": "Visual Studio 17 2022", "architecture": "x64", "condition": { "type": "equals", diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu index c1b178a4a1ea94b61afdcbe78476684d032f43c0..f7bb09f816f45973fd4e2319a1bfa35cf9172caa 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu @@ -1,306 +1,310 @@ -// _ ___ __ __________ _ __ ______________ __ -// | | / (_)____/ /___ ______ _/ / ____/ /_ __(_)___/ /____ / ___/ __ / / / / -// | | / / / ___/ __/ / / / __ `/ / /_ / / / / / / __ / ___/ / /___/ /_/ / / / / -// | |/ / / / / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__ ) / /_) / ____/ /__/ / -// |___/_/_/ \__/\__,_/\__,_/_/_/ /_/\__,_/_/\__,_/____/ \____/_/ \_____/ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -////////////////////////////////////////////////////////////////////////// -/* Device code */ +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CalcMac27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Soeren Peters +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" +#include "lbm/MacroscopicQuantities.h" + +#include "Kernel/Utilities/DistributionHelper.cuh" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; + +//////////////////////////////////////////////////////////////////////////////// +__global__ void LBCalcMac27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) +{ + const unsigned int tx = threadIdx.x; // Thread index = lokaler i index + const unsigned int by = blockIdx.x; // Block index x + const unsigned int bz = blockIdx.y; // Block index y + const unsigned int x = tx + STARTOFFX; // Globaler x-Index + const unsigned int y = by + STARTOFFY; // Globaler y-Index + const unsigned int z = bz + STARTOFFZ; // Globaler z-Index + + const unsigned nx = blockDim.x + 2 * STARTOFFX; + const unsigned ny = gridDim.x + 2 * STARTOFFY; + + const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device + + + if(k >= numberOfLBnodes) + return; + + if(!isValidFluidNode(geoD[k])) + return; + + rhoD[k] = c0o1; + vxD[k] = c0o1; + vyD[k] = c0o1; + vzD[k] = c0o1; + + DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, neighborZ); + const auto& distribution = distr_wrapper.distribution; + + rhoD[k] = vf::lbm::getDensity(distribution.f); + vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f); + vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f); + vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f); +} + -#include "lbm/MacroscopicQuantities.h" -#include "../Kernel/Utilities/DistributionHelper.cuh" //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMac27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* distributions, - bool isEvenTimestep) +__global__ void LBCalcMacSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - const unsigned int tx = threadIdx.x; // Thread index = lokaler i index - const unsigned int by = blockIdx.x; // Block index x - const unsigned int bz = blockIdx.y; // Block index y - const unsigned int x = tx + STARTOFFX; // Globaler x-Index - const unsigned int y = by + STARTOFFY; // Globaler y-Index - const unsigned int z = bz + STARTOFFZ; // Globaler z-Index + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if(nodeIndex<numberOfLBnodes) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + unsigned int kzero= nodeIndex; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + rhoD[nodeIndex] = + (dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_000])[kzero]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vxD[nodeIndex] = + (dist.f[DIR_P00])[ke ]- (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + + vyD[nodeIndex] = + (dist.f[DIR_0P0])[kn ]- (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]- + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vzD[nodeIndex] = + (dist.f[DIR_00P])[kt ]- (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]- + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]- + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + 2.f*( + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+ + 3.f*( + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])- + rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+c0o1*rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]; // times zero for incompressible case + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } + } +} +//////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + - const unsigned nx = blockDim.x + 2 * STARTOFFX; - const unsigned ny = gridDim.x + 2 * STARTOFFY; - const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device - if(k >= numberOfLBnodes) - return; - if(!vf::gpu::isValidFluidNode(geoD[k])) - return; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - vf::gpu::DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, neighborZ); - const auto& distribution = distr_wrapper.distribution; - rhoD[k] = vf::lbm::getDensity(distribution.f); - vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f); - vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f); - vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f); -} -//////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMacSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* DD, - bool isEvenTimestep) -{ - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - if(k<numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - rhoD[k] = (D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ - (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ - (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]; - - vxD[k] = (D.f[DIR_P00])[ke ]- (D.f[DIR_M00])[kw ]+ - (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]- (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]- (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]- (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]; - - vyD[k] = (D.f[DIR_0P0])[kn ]- (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]- - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]- (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]- - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]; - - vzD[k] = (D.f[DIR_00P])[kt ]- (D.f[DIR_00M])[kb ]+ - (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]- - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]- - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]- - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]; - - pressD[k] = ((D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ - (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ - 2.f*( - (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ])+ - 3.f*( - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+c0o1*rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - - } - } -} //////////////////////////////////////////////////////////////////////////////// __global__ void LBCalcMacCompSP27( - real *vxD, - real *vyD, - real *vzD, - real *rhoD, - real *pressD, - unsigned int *geoD, - unsigned int *neighborX, - unsigned int *neighborY, - unsigned int *neighborZ, - unsigned long long numberOfLBnodes, - real *distributions, - bool isEvenTimestep) + real *vxD, + real *vyD, + real *vzD, + real *rhoD, + real *pressD, + unsigned int *geoD, + unsigned int *neighborX, + unsigned int *neighborY, + unsigned int *neighborZ, + unsigned long long numberOfLBnodes, + real *distributions, + bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k >= numberOfLBnodes) + if(nodeIndex >= numberOfLBnodes) return; - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; - if (!vf::gpu::isValidFluidNode(geoD[k])) + if (!isValidFluidNode(geoD[nodeIndex])) return; - vf::gpu::DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, - neighborZ); + DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ); const auto &distribution = distr_wrapper.distribution; - rhoD[k] = vf::lbm::getDensity(distribution.f); - vxD[k] = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]); - vyD[k] = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]); - vzD[k] = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]); - pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); + rhoD[nodeIndex] = vf::lbm::getDensity(distribution.f); + vxD[nodeIndex] = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[nodeIndex]); + vyD[nodeIndex] = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[nodeIndex]); + vzD[nodeIndex] = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[nodeIndex]); + pressD[nodeIndex] = vf::lbm::getPressure(distribution.f, rhoD[nodeIndex], vxD[nodeIndex], vyD[nodeIndex], vzD[nodeIndex]); } @@ -339,206 +343,155 @@ __global__ void LBCalcMacCompSP27( //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMedSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* DD, - bool isEvenTimestep) +__global__ void LBCalcMedSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - rhoD[k] = (D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ - (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ - (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]+ - RHO; - - vxD[k] = (D.f[DIR_P00])[ke ]- (D.f[DIR_M00])[kw ]+ - (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]- (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]- (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]- (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]+ - VX; - - vyD[k] = (D.f[DIR_0P0])[kn ]- (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]- - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]- (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]- - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]+ - VY; - - vzD[k] = (D.f[DIR_00P])[kt ]- (D.f[DIR_00M])[kb ]+ - (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]- - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]- - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]- - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]+ - VZ; - - pressD[k] = ((D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ - (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ - c2o1*( - (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ])+ - c3o1*( - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+ - PRESS; - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + unsigned int kzero= nodeIndex; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + rhoD[nodeIndex] = + (dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_000])[kzero]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+ + RHO; + + vxD[nodeIndex] = + (dist.f[DIR_P00])[ke ]- (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+ + VX; + + vyD[nodeIndex] = + (dist.f[DIR_0P0])[kn ]- (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]- + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+ + VY; + + vzD[nodeIndex] = + (dist.f[DIR_00P])[kt ]- (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]- + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]- + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+ + VZ; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + c2o1*( + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+ + c3o1*( + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])- + rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+ + PRESS; + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -563,259 +516,152 @@ __global__ void LBCalcMedSP27( real* vxD, //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMedCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* DD, - bool isEvenTimestep) +__global__ void LBCalcMedCompSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - //index - //unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - real mfcbb = (D.f[DIR_P00])[k];//[ke ]; - real mfabb = (D.f[DIR_M00])[kw];//[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ]; - real mfbab = (D.f[DIR_0M0])[ks];//[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ]; - real mfbba = (D.f[DIR_00M])[kb];//[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ]; - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ]; - real mfacb = (D.f[DIR_MP0])[kw];//[knw ]; - real mfcbc = (D.f[DIR_P0P])[k];//[kte ]; - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ]; - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ]; - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ]; - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ]; - real mfbac = (D.f[DIR_0MP])[ks];//[kts ]; - real mfbbb = (D.f[DIR_000])[k];//[kzero]; - real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ]; - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ]; - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ]; - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; - //////////////////////////////////////////////////////////////////////////////////// - real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - - real rho = c1o1 + drho; - - rhoD[k] = drho + RHO; - - vxD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) / rho) + VX; - vyD[k] = - (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) / rho) + VY; - vzD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) / rho) + VZ; - - //rhoD[k] = - // (D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] + - // (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] + - // (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] + - // (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] + - // (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] + - // (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] + - // (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] + - // (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] + - // (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts] + - // (D.f[DIR_000])[kzero] + - // (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] + - // (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] + - // (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] + - // (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw];// +RHO; - - // vxD[k] = - //((D.f[DIR_P00])[ke ]- (D.f[DIR_M00])[kw ]+ - // (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]+ - // (D.f[DIR_PM0])[kse ]- (D.f[DIR_MP0])[knw ]+ - // (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]+ - // (D.f[DIR_P0M])[kbe ]- (D.f[DIR_M0P])[ktw ]+ - // (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]+ - // (D.f[DIR_PMP])[ktse]- (D.f[DIR_MPP])[ktnw]+ - // (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]+ - // (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]) / (one + rhoD[k])+ - // VX; - - // vyD[k] = - //((D.f[DIR_0P0])[kn ]- (D.f[DIR_0M0])[ks ]+ - // (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]- - // (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - // (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]+ - // (D.f[DIR_0PM])[kbn ]- (D.f[DIR_0MP])[kts ]+ - // (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]- - // (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - // (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - // (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]) / (one + rhoD[k])+ - // VY; - - // vzD[k] = - //((D.f[DIR_00P])[kt ]- (D.f[DIR_00M])[kb ]+ - // (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]- - // (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - // (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]- - // (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - // (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - // (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]- - // (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - // (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]) / (one + rhoD[k])+ - // VZ; - - pressD[k] = ((D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ - (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ - c2o1*( - (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ])+ - c3o1*( - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+ - PRESS; - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + //unsigned int kzero= k; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke ]; + real mfabb = (dist.f[DIR_M00])[kw];//[kw ]; + real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn ]; + real mfbab = (dist.f[DIR_0M0])[ks];//[ks ]; + real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt ]; + real mfbba = (dist.f[DIR_00M])[kb];//[kb ]; + real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne ]; + real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw ]; + real mfcab = (dist.f[DIR_PM0])[ks];//[kse ]; + real mfacb = (dist.f[DIR_MP0])[kw];//[knw ]; + real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte ]; + real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw ]; + real mfcba = (dist.f[DIR_P0M])[kb];//[kbe ]; + real mfabc = (dist.f[DIR_M0P])[kw];//[ktw ]; + real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn ]; + real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs ]; + real mfbca = (dist.f[DIR_0PM])[kb];//[kbn ]; + real mfbac = (dist.f[DIR_0MP])[ks];//[kts ]; + real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero]; + real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; + real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; + real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ]; + real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ]; + real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ]; + real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ]; + real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; + real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; + //////////////////////////////////////////////////////////////////////////////////// + real drho = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + + real rho = c1o1 + drho; + + rhoD[nodeIndex] = drho + RHO; + + vxD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) / rho) + VX; + vyD[nodeIndex] = + (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) / rho) + VY; + vzD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) / rho) + VZ; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + c2o1*( + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+ + c3o1*( + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])- + rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+ + PRESS; + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -841,309 +687,191 @@ __global__ void LBCalcMedCompSP27( real* vxD, //////////////////////////////////////////////////////////////////////////////// __global__ void LBCalcMedCompAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* DD, - real* DD_AD, - bool isEvenTimestep) + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + real* distributionsAD, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep == true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } - //////////////////////////////////////////////////////////////////////////////// - Distributions27 Dad; - if (isEvenTimestep == true) - { - Dad.f[DIR_P00] = &DD_AD[DIR_P00 * numberOfLBnodes]; - Dad.f[DIR_M00] = &DD_AD[DIR_M00 * numberOfLBnodes]; - Dad.f[DIR_0P0] = &DD_AD[DIR_0P0 * numberOfLBnodes]; - Dad.f[DIR_0M0] = &DD_AD[DIR_0M0 * numberOfLBnodes]; - Dad.f[DIR_00P] = &DD_AD[DIR_00P * numberOfLBnodes]; - Dad.f[DIR_00M] = &DD_AD[DIR_00M * numberOfLBnodes]; - Dad.f[DIR_PP0] = &DD_AD[DIR_PP0 * numberOfLBnodes]; - Dad.f[DIR_MM0] = &DD_AD[DIR_MM0 * numberOfLBnodes]; - Dad.f[DIR_PM0] = &DD_AD[DIR_PM0 * numberOfLBnodes]; - Dad.f[DIR_MP0] = &DD_AD[DIR_MP0 * numberOfLBnodes]; - Dad.f[DIR_P0P] = &DD_AD[DIR_P0P * numberOfLBnodes]; - Dad.f[DIR_M0M] = &DD_AD[DIR_M0M * numberOfLBnodes]; - Dad.f[DIR_P0M] = &DD_AD[DIR_P0M * numberOfLBnodes]; - Dad.f[DIR_M0P] = &DD_AD[DIR_M0P * numberOfLBnodes]; - Dad.f[DIR_0PP] = &DD_AD[DIR_0PP * numberOfLBnodes]; - Dad.f[DIR_0MM] = &DD_AD[DIR_0MM * numberOfLBnodes]; - Dad.f[DIR_0PM] = &DD_AD[DIR_0PM * numberOfLBnodes]; - Dad.f[DIR_0MP] = &DD_AD[DIR_0MP * numberOfLBnodes]; - Dad.f[DIR_000] = &DD_AD[DIR_000 * numberOfLBnodes]; - Dad.f[DIR_PPP] = &DD_AD[DIR_PPP * numberOfLBnodes]; - Dad.f[DIR_MMP] = &DD_AD[DIR_MMP * numberOfLBnodes]; - Dad.f[DIR_PMP] = &DD_AD[DIR_PMP * numberOfLBnodes]; - Dad.f[DIR_MPP] = &DD_AD[DIR_MPP * numberOfLBnodes]; - Dad.f[DIR_PPM] = &DD_AD[DIR_PPM * numberOfLBnodes]; - Dad.f[DIR_MMM] = &DD_AD[DIR_MMM * numberOfLBnodes]; - Dad.f[DIR_PMM] = &DD_AD[DIR_PMM * numberOfLBnodes]; - Dad.f[DIR_MPM] = &DD_AD[DIR_MPM * numberOfLBnodes]; - } - else - { - Dad.f[DIR_M00] = &DD_AD[DIR_P00 * numberOfLBnodes]; - Dad.f[DIR_P00] = &DD_AD[DIR_M00 * numberOfLBnodes]; - Dad.f[DIR_0M0] = &DD_AD[DIR_0P0 * numberOfLBnodes]; - Dad.f[DIR_0P0] = &DD_AD[DIR_0M0 * numberOfLBnodes]; - Dad.f[DIR_00M] = &DD_AD[DIR_00P * numberOfLBnodes]; - Dad.f[DIR_00P] = &DD_AD[DIR_00M * numberOfLBnodes]; - Dad.f[DIR_MM0] = &DD_AD[DIR_PP0 * numberOfLBnodes]; - Dad.f[DIR_PP0] = &DD_AD[DIR_MM0 * numberOfLBnodes]; - Dad.f[DIR_MP0] = &DD_AD[DIR_PM0 * numberOfLBnodes]; - Dad.f[DIR_PM0] = &DD_AD[DIR_MP0 * numberOfLBnodes]; - Dad.f[DIR_M0M] = &DD_AD[DIR_P0P * numberOfLBnodes]; - Dad.f[DIR_P0P] = &DD_AD[DIR_M0M * numberOfLBnodes]; - Dad.f[DIR_M0P] = &DD_AD[DIR_P0M * numberOfLBnodes]; - Dad.f[DIR_P0M] = &DD_AD[DIR_M0P * numberOfLBnodes]; - Dad.f[DIR_0MM] = &DD_AD[DIR_0PP * numberOfLBnodes]; - Dad.f[DIR_0PP] = &DD_AD[DIR_0MM * numberOfLBnodes]; - Dad.f[DIR_0MP] = &DD_AD[DIR_0PM * numberOfLBnodes]; - Dad.f[DIR_0PM] = &DD_AD[DIR_0MP * numberOfLBnodes]; - Dad.f[DIR_000] = &DD_AD[DIR_000 * numberOfLBnodes]; - Dad.f[DIR_PPP] = &DD_AD[DIR_MMM * numberOfLBnodes]; - Dad.f[DIR_MMP] = &DD_AD[DIR_PPM * numberOfLBnodes]; - Dad.f[DIR_PMP] = &DD_AD[DIR_MPM * numberOfLBnodes]; - Dad.f[DIR_MPP] = &DD_AD[DIR_PMM * numberOfLBnodes]; - Dad.f[DIR_PPM] = &DD_AD[DIR_MMP * numberOfLBnodes]; - Dad.f[DIR_MMM] = &DD_AD[DIR_PPP * numberOfLBnodes]; - Dad.f[DIR_PMM] = &DD_AD[DIR_MPP * numberOfLBnodes]; - Dad.f[DIR_MPM] = &DD_AD[DIR_PMP * numberOfLBnodes]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k < numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - //index - //unsigned int kzero = k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - real CONC = concD[k]; - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - concD[k] = c0o1; - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if (geoD[k] == GEO_FLUID) - { - real mfcbb = (D.f[DIR_P00])[k];//[ke ]; - real mfabb = (D.f[DIR_M00])[kw];//[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ]; - real mfbab = (D.f[DIR_0M0])[ks];//[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ]; - real mfbba = (D.f[DIR_00M])[kb];//[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ]; - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ]; - real mfacb = (D.f[DIR_MP0])[kw];//[knw ]; - real mfcbc = (D.f[DIR_P0P])[k];//[kte ]; - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ]; - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ]; - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ]; - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ]; - real mfbac = (D.f[DIR_0MP])[ks];//[kts ]; - real mfbbb = (D.f[DIR_000])[k];//[kzero]; - real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ]; - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ]; - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ]; - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; - //////////////////////////////////////////////////////////////////////////////////// - real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - real rho = c1o1 + drho; - //////////////////////////////////////////////////////////////////////////////////// - - rhoD[k] = drho + RHO; - - vxD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) / rho) + VX; - - vyD[k] = - (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) / rho) + VY; - - vzD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) / rho) + VZ; - - pressD[k] = - ((D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] + - (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] + - (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] + - c2o1*( - (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] + - (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] + - (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] + - (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] + - (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] + - (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts]) + - c3o1*( - (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] + - (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] + - (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] + - (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw]) - - rhoD[k] - (vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1 + rhoD[k])) * c1o2 + rhoD[k] + - PRESS; - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - ////////////////////////////////////////////////////////////////////////// - mfcbb = (Dad.f[DIR_P00])[k ]; - mfabb = (Dad.f[DIR_M00])[kw ]; - mfbcb = (Dad.f[DIR_0P0])[k ]; - mfbab = (Dad.f[DIR_0M0])[ks ]; - mfbbc = (Dad.f[DIR_00P])[k ]; - mfbba = (Dad.f[DIR_00M])[kb ]; - mfccb = (Dad.f[DIR_PP0])[k ]; - mfaab = (Dad.f[DIR_MM0])[ksw ]; - mfcab = (Dad.f[DIR_PM0])[ks ]; - mfacb = (Dad.f[DIR_MP0])[kw ]; - mfcbc = (Dad.f[DIR_P0P])[k ]; - mfaba = (Dad.f[DIR_M0M])[kbw ]; - mfcba = (Dad.f[DIR_P0M])[kb ]; - mfabc = (Dad.f[DIR_M0P])[kw ]; - mfbcc = (Dad.f[DIR_0PP])[k ]; - mfbaa = (Dad.f[DIR_0MM])[kbs ]; - mfbca = (Dad.f[DIR_0PM])[kb ]; - mfbac = (Dad.f[DIR_0MP])[ks ]; - mfbbb = (Dad.f[DIR_000])[k ]; - mfccc = (Dad.f[DIR_PPP])[k ]; - mfaac = (Dad.f[DIR_MMP])[ksw ]; - mfcac = (Dad.f[DIR_PMP])[ks ]; - mfacc = (Dad.f[DIR_MPP])[kw ]; - mfcca = (Dad.f[DIR_PPM])[kb ]; - mfaaa = (Dad.f[DIR_MMM])[kbsw]; - mfcaa = (Dad.f[DIR_PMM])[kbs ]; - mfaca = (Dad.f[DIR_MPM])[kbw ]; - ////////////////////////////////////////////////////////////////////////// - concD[k] = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb + CONC; - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if ( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist, distAD; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + getPointersToDistributions(distAD, distributionsAD, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + //unsigned int kzero = k; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + real CONC = concD[nodeIndex]; + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + concD[nodeIndex] = c0o1; + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if (geoD[nodeIndex] == GEO_FLUID) + { + real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke ]; + real mfabb = (dist.f[DIR_M00])[kw];//[kw ]; + real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn ]; + real mfbab = (dist.f[DIR_0M0])[ks];//[ks ]; + real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt ]; + real mfbba = (dist.f[DIR_00M])[kb];//[kb ]; + real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne ]; + real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw ]; + real mfcab = (dist.f[DIR_PM0])[ks];//[kse ]; + real mfacb = (dist.f[DIR_MP0])[kw];//[knw ]; + real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte ]; + real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw ]; + real mfcba = (dist.f[DIR_P0M])[kb];//[kbe ]; + real mfabc = (dist.f[DIR_M0P])[kw];//[ktw ]; + real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn ]; + real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs ]; + real mfbca = (dist.f[DIR_0PM])[kb];//[kbn ]; + real mfbac = (dist.f[DIR_0MP])[ks];//[kts ]; + real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero]; + real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; + real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; + real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ]; + real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ]; + real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ]; + real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ]; + real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; + real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; + //////////////////////////////////////////////////////////////////////////////////// + real drho = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + real rho = c1o1 + drho; + //////////////////////////////////////////////////////////////////////////////////// + + rhoD[nodeIndex] = drho + RHO; + + vxD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) / rho) + VX; + + vyD[nodeIndex] = + (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) / rho) + VY; + + vzD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) / rho) + VZ; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke] + (dist.f[DIR_M00])[kw] + + (dist.f[DIR_0P0])[kn] + (dist.f[DIR_0M0])[ks] + + (dist.f[DIR_00P])[kt] + (dist.f[DIR_00M])[kb] + + c2o1*( + (dist.f[DIR_PP0])[kne] + (dist.f[DIR_MM0])[ksw] + + (dist.f[DIR_PM0])[kse] + (dist.f[DIR_MP0])[knw] + + (dist.f[DIR_P0P])[kte] + (dist.f[DIR_M0M])[kbw] + + (dist.f[DIR_P0M])[kbe] + (dist.f[DIR_M0P])[ktw] + + (dist.f[DIR_0PP])[ktn] + (dist.f[DIR_0MM])[kbs] + + (dist.f[DIR_0PM])[kbn] + (dist.f[DIR_0MP])[kts]) + + c3o1*( + (dist.f[DIR_PPP])[ktne] + (dist.f[DIR_MMP])[ktsw] + + (dist.f[DIR_PMP])[ktse] + (dist.f[DIR_MPP])[ktnw] + + (dist.f[DIR_PPM])[kbne] + (dist.f[DIR_MMM])[kbsw] + + (dist.f[DIR_PMM])[kbse] + (dist.f[DIR_MPM])[kbnw]) - + rhoD[nodeIndex] - (vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1 + rhoD[nodeIndex])) * c1o2 + rhoD[nodeIndex] + + PRESS; + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + ////////////////////////////////////////////////////////////////////////// + mfcbb = (distAD.f[DIR_P00])[nodeIndex ]; + mfabb = (distAD.f[DIR_M00])[kw ]; + mfbcb = (distAD.f[DIR_0P0])[nodeIndex ]; + mfbab = (distAD.f[DIR_0M0])[ks ]; + mfbbc = (distAD.f[DIR_00P])[nodeIndex ]; + mfbba = (distAD.f[DIR_00M])[kb ]; + mfccb = (distAD.f[DIR_PP0])[nodeIndex ]; + mfaab = (distAD.f[DIR_MM0])[ksw ]; + mfcab = (distAD.f[DIR_PM0])[ks ]; + mfacb = (distAD.f[DIR_MP0])[kw ]; + mfcbc = (distAD.f[DIR_P0P])[nodeIndex ]; + mfaba = (distAD.f[DIR_M0M])[kbw ]; + mfcba = (distAD.f[DIR_P0M])[kb ]; + mfabc = (distAD.f[DIR_M0P])[kw ]; + mfbcc = (distAD.f[DIR_0PP])[nodeIndex ]; + mfbaa = (distAD.f[DIR_0MM])[kbs ]; + mfbca = (distAD.f[DIR_0PM])[kb ]; + mfbac = (distAD.f[DIR_0MP])[ks ]; + mfbbb = (distAD.f[DIR_000])[nodeIndex ]; + mfccc = (distAD.f[DIR_PPP])[nodeIndex ]; + mfaac = (distAD.f[DIR_MMP])[ksw ]; + mfcac = (distAD.f[DIR_PMP])[ks ]; + mfacc = (distAD.f[DIR_MPP])[kw ]; + mfcca = (distAD.f[DIR_PPM])[kb ]; + mfaaa = (distAD.f[DIR_MMM])[kbsw]; + mfcaa = (distAD.f[DIR_PMM])[kbs ]; + mfaca = (distAD.f[DIR_MPM])[kbw ]; + ////////////////////////////////////////////////////////////////////////// + concD[nodeIndex] = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb + CONC; + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -1168,54 +896,50 @@ __global__ void LBCalcMedCompAD27( //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMacMedSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int tdiff, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void LBCalcMacMedSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int tdiff, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - rhoD[k] = RHO / tdiff; - vxD[k] = VX / tdiff; - vyD[k] = VY / tdiff; - vzD[k] = VZ / tdiff; - pressD[k] = PRESS / tdiff; - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if(nodeIndex<numberOfLBnodes) + { + ////////////////////////////////////////////////////////////////////////// + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + rhoD[nodeIndex] = RHO / tdiff; + vxD[nodeIndex] = VX / tdiff; + vyD[nodeIndex] = VY / tdiff; + vzD[nodeIndex] = VZ / tdiff; + pressD[nodeIndex] = PRESS / tdiff; + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -1241,34 +965,29 @@ __global__ void LBCalcMacMedSP27( real* vxD, //////////////////////////////////////////////////////////////////////////////// __global__ void LBResetMedianValuesSP27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k<numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if ( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + } } //////////////////////////////////////////////////////////////////////////////// @@ -1294,36 +1013,30 @@ __global__ void LBResetMedianValuesSP27( //////////////////////////////////////////////////////////////////////////////// __global__ void LBResetMedianValuesAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k < numberOfLBnodes) - { - ////////////////////////////////////////////////////////////////////////// - concD[k] = c0o1; - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if (nodeIndex < numberOfLBnodes) + { + concD[nodeIndex] = c0o1; + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + } } //////////////////////////////////////////////////////////////////////////////// @@ -1348,177 +1061,121 @@ __global__ void LBResetMedianValuesAD27( //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMeasurePoints( real* vxMP, - real* vyMP, - real* vzMP, - real* rhoMP, - unsigned int* kMP, - unsigned int numberOfPointskMP, - unsigned int MPClockCycle, - unsigned int t, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* DD, - bool isEvenTimestep) +__global__ void LBCalcMeasurePoints( + real* vxMP, + real* vyMP, + real* vzMP, + real* rhoMP, + unsigned int* kMP, + unsigned int numberOfPointskMP, + unsigned int MPClockCycle, + unsigned int t, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; - D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; - D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; - D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; - D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; - D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; - D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; - D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; - D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; - D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; - D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; - D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; - D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; - D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; - D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; - D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; - D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; - D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; - D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; - D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; - D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; - D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; - D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; - D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; - D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; - D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfPointskMP) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= kMP[k];//k; - unsigned int ke = kzero; - unsigned int kw = neighborX[kzero]; - unsigned int kn = kzero; - unsigned int ks = neighborY[kzero]; - unsigned int kt = kzero; - unsigned int kb = neighborZ[kzero]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = kzero; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = kzero; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = kzero; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = kzero; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - unsigned int kMac = k*MPClockCycle + t; - ////////////////////////////////////////////////////////////////////////// - - if(geoD[kzero] == GEO_FLUID) - { - rhoMP[kMac]= (D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ - (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ - (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]; - - vxMP[kMac] = (D.f[DIR_P00])[ke ]- (D.f[DIR_M00])[kw ]+ - (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]+ - (D.f[DIR_PM0])[kse ]- (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]+ - (D.f[DIR_P0M])[kbe ]- (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]- (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]+ - (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]; - - vyMP[kMac] = (D.f[DIR_0P0])[kn ]- (D.f[DIR_0M0])[ks ]+ - (D.f[DIR_PP0])[kne ]- (D.f[DIR_MM0])[ksw ]- - (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ - (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]+ - (D.f[DIR_0PM])[kbn ]- (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_PPP])[ktne]- (D.f[DIR_MMP])[ktsw]- - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]; - - vzMP[kMac] = (D.f[DIR_00P])[kt ]- (D.f[DIR_00M])[kb ]+ - (D.f[DIR_P0P])[kte ]- (D.f[DIR_M0M])[kbw ]- - (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ - (D.f[DIR_0PP])[ktn ]- (D.f[DIR_0MM])[kbs ]- - (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ - (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ - (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]- - (D.f[DIR_PPM])[kbne]- (D.f[DIR_MMM])[kbsw]- - (D.f[DIR_PMM])[kbse]- (D.f[DIR_MPM])[kbnw]; - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex < numberOfPointskMP ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + unsigned int kzero= kMP[nodeIndex];//k; + unsigned int ke = kzero; + unsigned int kw = neighborX[kzero]; + unsigned int kn = kzero; + unsigned int ks = neighborY[kzero]; + unsigned int kt = kzero; + unsigned int kb = neighborZ[kzero]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = kzero; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = kzero; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = kzero; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = kzero; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + unsigned int kMac = nodeIndex*MPClockCycle + t; + ////////////////////////////////////////////////////////////////////////// + + if(geoD[kzero] == GEO_FLUID) + { + rhoMP[kMac]= (dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_000])[kzero]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vxMP[kMac] = (dist.f[DIR_P00])[ke ]- (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + + vyMP[kMac] = (dist.f[DIR_0P0])[kn ]- (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]- + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vzMP[kMac] = (dist.f[DIR_00P])[kt ]- (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]- + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]- + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -1559,40 +1216,36 @@ __global__ void LBCalcMeasurePoints( real* vxMP, //////////////////////////////////////////////////////////////////////////////// -__global__ void LBSetOutputWallVelocitySP27( real* vxD, - real* vyD, - real* vzD, - real* vxWall, - real* vyWall, - real* vzWall, - int numberOfWallNodes, - int* kWallNodes, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - real* DD, - bool isEvenTimestep) +__global__ void LBSetOutputWallVelocitySP27( + real* vxD, + real* vyD, + real* vzD, + real* vxWall, + real* vyWall, + real* vzWall, + int numberOfWallNodes, + int* kWallNodes, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfWallNodes) + if(nodeIndex<numberOfWallNodes) { ////////////////////////////////////////////////////////////////////////// //index - unsigned int KWN = kWallNodes[k]; + unsigned int KWN = kWallNodes[nodeIndex]; ////////////////////////////////////////////////////////////////////////// vxD[KWN] = 0.0;//vxWall[k]; vyD[KWN] = 0.0;//vyWall[k]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index 08dc872e00e501f799b03c22b6c14ec48178300b..3134db44346ee7f465a5c8f04505ee5749482fbf 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -936,6 +936,17 @@ __global__ void QSlipNormDeviceComp27(real* DD, unsigned long long numberOfLBnodes, bool isEvenTimestep); +__global__ void BBSlipDeviceComp27( + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + // Stress BCs (wall model) __global__ void QStressDeviceComp27(real* DD, int* k_Q, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu index 16e22a195b3593c91881adf3f688897f53150da1..0724002cffa3a47820664851ffefd1c35dbe0235 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu @@ -32,12 +32,13 @@ //======================================================================================= #include "DataTypes.h" -#include "Kernel/Utilities/DistributionHelper.cuh" -#include "Kernel/Utilities/ChimeraTransformation.h" -#include "Kernel/Utilities/ScalingHelperFunctions.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" +#include "LBM/GPUHelperFunctions/ScalingUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////// //! \brief Calculate the interpolated distributions on the fine destination nodes @@ -237,13 +238,13 @@ __global__ void scaleCF_compressible( OffCF offsetCF) { //////////////////////////////////////////////////////////////////////////////// - //! - Get the thread index coordinates from threadId_100, blockId_100, blockDim and gridDim. + //! - Get the node index coordinates from threadId_100, blockId_100, blockDim and gridDim. //! - const unsigned k_thread = vf::gpu::getNodeIndex(); + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Return for non-interface node - if (k_thread >= numberOfInterfaceNodes) + if (nodeIndex >= numberOfInterfaceNodes) return; ////////////////////////////////////////////////////////////////////////// @@ -252,8 +253,9 @@ __global__ void scaleCF_compressible( //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 distFine = vf::gpu::getDistributionReferences27(distributionsFine, numberOfLBnodesFine, true); - Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); + Distributions27 distFine, distCoarse; + getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true); + getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - declare local variables for source nodes @@ -289,7 +291,7 @@ __global__ void scaleCF_compressible( // source node BSW = MMM //////////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors - unsigned int k_base_000 = indicesCoarseMMM[k_thread]; + unsigned int k_base_000 = indicesCoarseMMM[nodeIndex]; unsigned int k_base_M00 = neighborXcoarse [k_base_000]; unsigned int k_base_0M0 = neighborYcoarse [k_base_000]; unsigned int k_base_00M = neighborZcoarse [k_base_000]; @@ -739,9 +741,9 @@ __global__ void scaleCF_compressible( //////////////////////////////////////////////////////////////////////////////// //! - Set the relative position of the offset cell {-1, 0, 1} //! - real xoff = offsetCF.xOffCF[k_thread]; - real yoff = offsetCF.yOffCF[k_thread]; - real zoff = offsetCF.zOffCF[k_thread]; + real xoff = offsetCF.xOffCF[nodeIndex]; + real yoff = offsetCF.yOffCF[nodeIndex]; + real zoff = offsetCF.zOffCF[nodeIndex]; real xoff_sq = xoff * xoff; real yoff_sq = yoff * yoff; @@ -904,7 +906,7 @@ __global__ void scaleCF_compressible( ////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors - k_base_000 = indicesFineMMM[k_thread]; + k_base_000 = indicesFineMMM[nodeIndex]; k_base_M00 = neighborXfine [k_base_000]; k_base_0M0 = neighborYfine [k_base_000]; k_base_00M = neighborZfine [k_base_000]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu index 4a46dd5819069fb7ad936aa4cd8d249153bc6ca8..e7d999d108e59bca98bf87b813f9479f1c601266 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu @@ -31,12 +31,13 @@ //! \author Martin Schoenherr, Anna Wellmann //======================================================================================= -#include "Kernel/Utilities/DistributionHelper.cuh" -#include "Kernel/Utilities/ChimeraTransformation.h" -#include "Kernel/Utilities/ScalingHelperFunctions.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/ScalingUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////// //! \brief Interpolate from fine to coarse @@ -65,13 +66,13 @@ __global__ void scaleFC_compressible( OffFC offsetFC) { //////////////////////////////////////////////////////////////////////////////// - //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned k_thread = vf::gpu::getNodeIndex(); + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Return for non-interface node - if (k_thread >= numberOfInterfaceNodes) + if (nodeIndex >= numberOfInterfaceNodes) return; ////////////////////////////////////////////////////////////////////////// @@ -80,8 +81,9 @@ __global__ void scaleFC_compressible( //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 distFine = vf::gpu::getDistributionReferences27(distributionsFine, numberOfLBnodesFine, true); - Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); + Distributions27 distFine, distCoarse; + getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true); + getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - declare local variables for source nodes @@ -117,7 +119,7 @@ __global__ void scaleFC_compressible( // source node BSW = MMM ////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors - unsigned int k_base_000 = indicesFineMMM[k_thread]; + unsigned int k_base_000 = indicesFineMMM[nodeIndex]; unsigned int k_base_M00 = neighborXfine [k_base_000]; unsigned int k_base_0M0 = neighborYfine [k_base_000]; unsigned int k_base_00M = neighborZfine [k_base_000]; @@ -278,19 +280,6 @@ __global__ void scaleFC_compressible( real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011; real d_000, d_100, d_010, d_001, d_110, d_101, d_011; - //a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - - // kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP - - // kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - - // kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - - // c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP + - // c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + - // c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP + - // c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP + - // c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP + - // c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - - // c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + - // c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - // c64o1; a_000 = c1o64 * ( c2o1 * ( ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) + @@ -302,21 +291,6 @@ __global__ void scaleFC_compressible( ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP))); - - //b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM + - // c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP - - // c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP - - // kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP + - // kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP + - // c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP + - // c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP + - // c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP + - // c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - - // c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM + - // c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM + - // c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - - // c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - // c64o1; b_000 = c1o64 * ( c2o1 * ( ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + @@ -330,21 +304,6 @@ __global__ void scaleFC_compressible( c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM))); - - //c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + - // kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP - - // c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM + - // c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP - - // c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM - - // c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM - - // c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM - - // c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM + - // c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM + - // c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - - // c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - - // c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM + - // c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) / - // c64o1; c_000 = c1o64 * ( c2o1 * ( ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + @@ -359,23 +318,10 @@ __global__ void scaleFC_compressible( ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP))); - //a_100 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM))); - - //b_100 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM))); - - //c_100 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM))); - //a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + - // kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP + - // kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + - // kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM + - // c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + - // c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + - // c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - // c16o1; a_200 = c1o16 * ( c2o1 * ( ((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) + @@ -384,54 +330,25 @@ __global__ void scaleFC_compressible( ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM))); - - //b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM + - // kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP + - // c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) / - // c8o1; b_200 = c1o8 * ( c2o1 * ( -((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) + ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM))); - - //c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM + - // kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP - c2o1 * vx1_MPM + - // c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) / - // c8o1; c_200 = c1o8 * ( c2o1 * ( ((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) + ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM))); - //a_010 = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP))); - - //b_010 = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP))); - - //c_010 = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP))); - //a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM - - // kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP + - // c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) / - // c8o1; a_020 = c1o8 * ( c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP))); - - //b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM - - // c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP + - // c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP + - // kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP - - // kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - - // c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM + - // c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM - - // c2o1 * vx3_MMP) / - // c16o1; b_020 = c1o16 * ( c2o1 * ( ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + @@ -440,52 +357,23 @@ __global__ void scaleFC_compressible( ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP))); - - //c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM - - // kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP + c2o1 * vx2_MPM - - // c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) / - // c8o1; c_020 = c1o8 * ( c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) + ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP))); - //a_001 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1; a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM))); - - //b_001 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1; b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM))); - - //c_001 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1; c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM))); - //a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM + - // kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP - - // c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / - // c8o1; a_002 = c1o8 * ( c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) + ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM))); - - //b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM + - // kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP + c2o1 * vx3_MPM - - // c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / - // c8o1; b_002 = c1o8 * ( c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) + ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM))); - - //c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - - // kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP + - // c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM - - // c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP + - // c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP + - // c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP - - // c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP + - // c2o1 * vx2_MMM - c2o1 * vx2_MMP) / - // c16o1; c_002 = c1o16 * ( c2o1 * ( ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + @@ -495,23 +383,14 @@ __global__ void scaleFC_compressible( ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM))); - //a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1; - //b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1; - //c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1; a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP))); b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP))); c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP))); - //a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; - //b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; - //c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP))); b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP))); c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))); - //a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; - //b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; - //c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP))); b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))); c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP))); @@ -527,9 +406,9 @@ __global__ void scaleFC_compressible( //////////////////////////////////////////////////////////////////////////////// //! - Set the relative position of the offset cell {-1, 0, 1} //! - real xoff = offsetFC.xOffFC[k_thread]; - real yoff = offsetFC.yOffFC[k_thread]; - real zoff = offsetFC.zOffFC[k_thread]; + real xoff = offsetFC.xOffFC[nodeIndex]; + real yoff = offsetFC.yOffFC[nodeIndex]; + real zoff = offsetFC.zOffFC[nodeIndex]; real xoff_sq = xoff * xoff; real yoff_sq = yoff * yoff; @@ -540,27 +419,13 @@ __global__ void scaleFC_compressible( //! real LaplaceRho = ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1)) - ? c0o1 : c0o1; -// : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001); - // d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP - c2o1 * LaplaceRho) * c1o8; + ? c0o1 : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001); d_000 = c1o8 * ((((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))) - c2o1 * LaplaceRho); - - // d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4; d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM))); - - // d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4; d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP))); - - // d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4; d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM))); - - // d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2; d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))); - - // d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2; d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP))); - - // d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2; d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM))); @@ -780,7 +645,7 @@ __global__ void scaleFC_compressible( //////////////////////////////////////////////////////////////////////////////////// // index of the destination node and its neighbors - k_000 = indicesCoarse000[k_thread]; + k_000 = indicesCoarse000[nodeIndex]; k_M00 = neighborXcoarse [k_000]; k_0M0 = neighborYcoarse [k_000]; k_00M = neighborZcoarse [k_000]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index 75d5e2637fc03f6fa963dd8cc7c84a6523b5e75d..4faea21102b6a68dd9a0aa30e9cecc7eba6051b0 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -2657,12 +2657,11 @@ void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryCondit dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QSlipDeviceComp27<<< grid, threads >>> ( + BBSlipDeviceComp27<<< grid, threads >>> ( parameterDevice->distributions.f[0], boundaryCondition->k, boundaryCondition->q27[0], boundaryCondition->numberOfBCnodes, - parameterDevice->omega, parameterDevice->neighborX, parameterDevice->neighborY, parameterDevice->neighborZ, diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu index 66bf6dee234e584b734a9ef7a4d191e8ac7ff6a1..79dedee58afb7b11c4c3ede9911f54df65cf859f 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu @@ -1,31 +1,56 @@ -// _ ___ __ __________ _ __ ______________ __ -// | | / (_)____/ /___ ______ _/ / ____/ /_ __(_)___/ /____ / ___/ __ / / / / -// | | / / / ___/ __/ / / / __ `/ / /_ / / / / / / __ / ___/ / /___/ /_/ / / / / -// | |/ / / / / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__ ) / /_) / ____/ /__/ / -// |___/_/_/ \__/\__,_/\__,_/_/_/ /_/\__,_/_/\__,_/____/ \____/_/ \_____/ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -////////////////////////////////////////////////////////////////////////// -/* Device code */ +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file NoSlipBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include <lbm/constants/NumericConstants.h> -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// __global__ void QDevice3rdMomentsComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -559,16 +584,17 @@ __global__ void QDevice3rdMomentsComp27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QDeviceIncompHighNu27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QDeviceIncompHighNu27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -1055,16 +1081,16 @@ __global__ void QDeviceIncompHighNu27(real* DD, ////////////////////////////////////////////////////////////////////////////// __global__ void QDeviceCompHighNu27( - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -1629,16 +1655,16 @@ __global__ void QDeviceCompHighNu27( ////////////////////////////////////////////////////////////////////////////// __global__ void QDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The no-slip boundary condition is executed in the following steps @@ -1646,16 +1672,9 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -1673,7 +1692,7 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -1761,7 +1780,7 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Update distributions with subgrid distance (q) between zero and one real feq, q, velocityLB; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -1769,7 +1788,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -1777,7 +1796,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -1785,7 +1804,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -1793,7 +1812,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -1801,7 +1820,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -1809,7 +1828,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -1817,7 +1836,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -1825,7 +1844,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -1833,7 +1852,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -1841,7 +1860,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -1849,7 +1868,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -1857,7 +1876,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -1865,7 +1884,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -1873,7 +1892,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -1881,7 +1900,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -1889,7 +1908,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -1897,7 +1916,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -1905,7 +1924,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -1913,7 +1932,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -1921,7 +1940,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -1929,7 +1948,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -1937,7 +1956,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -1945,7 +1964,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -1953,7 +1972,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -1961,7 +1980,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -2011,16 +2030,17 @@ __global__ void QDeviceComp27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QDevice27(real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QDevice27( + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The no-slip boundary condition is executed in the following steps @@ -2028,19 +2048,12 @@ __global__ void QDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// @@ -2059,7 +2072,7 @@ __global__ void QDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -2148,7 +2161,7 @@ __global__ void QDevice27(real* distributions, //! - Update distributions with subgrid distance (q) between zero and one //! real feq, q, velocityLB; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -2156,7 +2169,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -2164,7 +2177,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -2172,7 +2185,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -2180,7 +2193,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -2188,7 +2201,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -2196,7 +2209,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -2204,7 +2217,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -2212,7 +2225,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -2220,7 +2233,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -2228,7 +2241,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -2236,7 +2249,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -2244,7 +2257,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -2252,7 +2265,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -2260,7 +2273,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -2268,7 +2281,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -2276,7 +2289,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -2284,7 +2297,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -2292,7 +2305,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -2300,7 +2313,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -2308,7 +2321,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -2316,7 +2329,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -2324,7 +2337,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -2332,7 +2345,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -2340,7 +2353,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -2348,7 +2361,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -2398,15 +2411,16 @@ __global__ void QDevice27(real* distributions, ////////////////////////////////////////////////////////////////////////////// -__global__ void BBDevice27(real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void BBDevice27( + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The no-slip boundary condition is executed in the following steps @@ -2414,18 +2428,11 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// // run for all indices in size of boundary condition (numberOfBCnodes) - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -2443,7 +2450,7 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; unsigned int kn = indexOfBCnode; @@ -2509,32 +2516,32 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - rewrite distributions if there is a sub-grid distance (q) in same direction real q; - q = (subgridD.q[DIR_P00])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00])[kw ]=f_E ; - q = (subgridD.q[DIR_M00])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00])[ke ]=f_W ; - q = (subgridD.q[DIR_0P0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0])[ks ]=f_N ; - q = (subgridD.q[DIR_0M0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0])[kn ]=f_S ; - q = (subgridD.q[DIR_00P])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M])[kb ]=f_T ; - q = (subgridD.q[DIR_00M])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P])[kt ]=f_B ; - q = (subgridD.q[DIR_PP0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0])[ksw ]=f_NE ; - q = (subgridD.q[DIR_MM0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0])[kne ]=f_SW ; - q = (subgridD.q[DIR_PM0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0])[knw ]=f_SE ; - q = (subgridD.q[DIR_MP0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0])[kse ]=f_NW ; - q = (subgridD.q[DIR_P0P])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M])[kbw ]=f_TE ; - q = (subgridD.q[DIR_M0M])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P])[kte ]=f_BW ; - q = (subgridD.q[DIR_P0M])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P])[ktw ]=f_BE ; - q = (subgridD.q[DIR_M0P])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M])[kbe ]=f_TW ; - q = (subgridD.q[DIR_0PP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM])[kbs ]=f_TN ; - q = (subgridD.q[DIR_0MM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP])[ktn ]=f_BS ; - q = (subgridD.q[DIR_0PM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP])[kts ]=f_BN ; - q = (subgridD.q[DIR_0MP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM])[kbn ]=f_TS ; - q = (subgridD.q[DIR_PPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE; - q = (subgridD.q[DIR_MMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW; - q = (subgridD.q[DIR_PPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE; - q = (subgridD.q[DIR_MMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW; - q = (subgridD.q[DIR_PMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE; - q = (subgridD.q[DIR_MPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW; - q = (subgridD.q[DIR_PMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE; - q = (subgridD.q[DIR_MPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00])[kw ]=f_E ; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00])[ke ]=f_W ; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0])[ks ]=f_N ; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0])[kn ]=f_S ; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M])[kb ]=f_T ; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P])[kt ]=f_B ; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0])[ksw ]=f_NE ; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0])[kne ]=f_SW ; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0])[knw ]=f_SE ; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0])[kse ]=f_NW ; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M])[kbw ]=f_TE ; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P])[kte ]=f_BW ; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P])[ktw ]=f_BE ; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M])[kbe ]=f_TW ; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM])[kbs ]=f_TN ; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP])[ktn ]=f_BS ; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP])[kts ]=f_BN ; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM])[kbn ]=f_TS ; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu index b17ffefd13a8a3a6048dde69ffb1db6c5def23e1..177eb41587896dd7993b06f98a1506abfc4f3f5f 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu @@ -1,53 +1,89 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PrecursorBCs27.cu +//! \ingroup GPU +//! \author Henry Korb, Henrik Asmuth +//====================================================================================== #include "LBM/LB.h" #include <lbm/constants/NumericConstants.h> #include <lbm/constants/D3Q27.h> #include <lbm/MacroscopicQuantities.h> -#include "VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh" -#include "VirtualFluids_GPU/GPU/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; - -__global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, - int numberOfBCnodes, - int numberOfPrecursorNodes, - int sizeQ, - real omega, - real* distributions, - real* subgridDistances, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighbors0PP, - uint* neighbors0PM, - uint* neighbors0MP, - uint* neighbors0MM, - real* weights0PP, - real* weights0PM, - real* weights0MP, - real* weights0MM, - real* vLast, - real* vCurrent, - real velocityX, - real velocityY, - real velocityZ, - real timeRatio, - real velocityRatio, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +using namespace vf::gpu; + +__global__ void QPrecursorDeviceCompZeroPress( + int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + int sizeQ, + real omega, + real* distributions, + real* subgridDistances, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real timeRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k>=numberOfBCnodes) return; + if(nodeIndex>=numberOfBCnodes) return; //////////////////////////////////////////////////////////////////////////////// // interpolation of velocity real vxLastInterpd, vyLastInterpd, vzLastInterpd; real vxNextInterpd, vyNextInterpd, vzNextInterpd; - uint kNeighbor0PP = neighbors0PP[k]; - real d0PP = weights0PP[k]; + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; real* vxLast = vLast; real* vyLast = &vLast[numberOfPrecursorNodes]; @@ -59,13 +95,13 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, if(d0PP < 1e6) { - uint kNeighbor0PM = neighbors0PM[k]; - uint kNeighbor0MP = neighbors0MP[k]; - uint kNeighbor0MM = neighbors0MM[k]; + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; - real d0PM = weights0PM[k]; - real d0MP = weights0MP[k]; - real d0MM = weights0MM[k]; + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); @@ -95,10 +131,15 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, // From here on just a copy of QVelDeviceCompZeroPress //////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! Distributions27 dist; getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); - unsigned int KQK = subgridDistanceIndices[k]; + unsigned int KQK = subgridDistanceIndices[nodeIndex]; unsigned int k000= KQK; unsigned int kP00 = KQK; unsigned int kM00 = neighborX[KQK]; @@ -187,7 +228,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, //////////////////////////////////////////////////////////////////////////////// //! - Update distributions with subgrid distance (q) between zero and one real feq, q, velocityLB, velocityBC; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -196,7 +237,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_M00])[kM00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P00, f_M00, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -205,7 +246,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_P00])[kP00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M00, f_P00, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -214,7 +255,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_0M0])[DIR_0M0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0P0, f_0M0, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -223,7 +264,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_0P0])[k0P0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0M0, f_0P0, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -232,7 +273,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_00M])[k00M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00P, f_00M, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -241,7 +282,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_00P])[k00P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00M, f_00P, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -250,7 +291,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_MM0])[kMM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PP0, f_MM0, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -259,7 +300,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_PP0])[kPP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MM0, f_PP0, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -268,7 +309,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_MP0])[kMP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PM0, f_MP0, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -277,7 +318,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_PM0])[kPM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MP0, f_PM0, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -286,7 +327,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_M0M])[kM0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0P, f_M0M, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -295,7 +336,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_P0P])[kP0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0M, f_P0P, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -304,7 +345,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_M0P])[kM0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0M, f_M0P, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -313,7 +354,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_P0M])[kP0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0P, f_P0M, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -322,7 +363,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_0MM])[k0MM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0MM, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -331,7 +372,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_0PP])[k0PP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0MM, f_0PP, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -340,7 +381,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_0MP])[k0MP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PM, f_0PP, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -349,7 +390,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_0PM])[k0PM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0PM, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -358,7 +399,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_MMM])[kMMM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPP, f_MMM, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -367,7 +408,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_PPP])[kPPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMM, f_PPP, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -376,7 +417,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_MMP])[kMMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPM, f_MMP, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -385,7 +426,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_PPM])[kPPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMP, f_PPM, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -394,7 +435,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_MPM])[kMPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMP, f_MPM, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -403,7 +444,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_PMP])[kPMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MPM, f_PMP, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -412,7 +453,7 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, (dist.f[DIR_MPP])[kMPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMM, f_MPP, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -424,43 +465,89 @@ __global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, - int numberOfBCnodes, - int numberOfPrecursorNodes, - real omega, - real* distributions, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighbors0PP, - uint* neighbors0PM, - uint* neighbors0MP, - uint* neighbors0MM, - real* weights0PP, - real* weights0PM, - real* weights0MP, - real* weights0MM, - real* vLast, - real* vCurrent, - real velocityX, - real velocityY, - real velocityZ, - real timeRatio, - real velocityRatio, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void PrecursorDeviceEQ27( + int *subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real omega, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real timeRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k>=numberOfBCnodes) return; + if(nodeIndex>=numberOfBCnodes) return; //////////////////////////////////////////////////////////////////////////////// // interpolation of velocity real vxLastInterpd, vyLastInterpd, vzLastInterpd; real vxNextInterpd, vyNextInterpd, vzNextInterpd; - uint kNeighbor0PP = neighbors0PP[k]; - real d0PP = weights0PP[k]; + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; real* vxLast = vLast; real* vyLast = &vLast[numberOfPrecursorNodes]; @@ -472,13 +559,13 @@ __global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, if(d0PP < 1e6) { - uint kNeighbor0PM = neighbors0PM[k]; - uint kNeighbor0MP = neighbors0MP[k]; - uint kNeighbor0MM = neighbors0MM[k]; + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; - real d0PM = weights0PM[k]; - real d0MP = weights0MP[k]; - real d0MM = weights0MM[k]; + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); @@ -508,10 +595,15 @@ __global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, // From here on just a copy of QVelDeviceCompZeroPress //////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! Distributions27 dist; getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); - unsigned int KQK = subgridDistanceIndices[k]; //QK + unsigned int KQK = subgridDistanceIndices[nodeIndex]; //QK unsigned int k000 = KQK; //000 unsigned int kP00 = KQK; //P00 unsigned int kM00 = neighborX[KQK]; //M00 @@ -649,33 +741,73 @@ __global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, - int numberOfBCnodes, - int numberOfPrecursorNodes, - real* distributions, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighbors0PP, - uint* neighbors0PM, - uint* neighbors0MP, - uint* neighbors0MM, - real* weights0PP, - real* weights0PM, - real* weights0MP, - real* weights0MM, - real* fsLast, - real* fsNext, - real timeRatio, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void PrecursorDeviceDistributions( + int *subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* fsLast, + real* fsNext, + real timeRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k>=numberOfBCnodes) return; + if(nodeIndex>=numberOfBCnodes) return; - uint kNeighbor0PP = neighbors0PP[k]; - real d0PP = weights0PP[k]; + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp; real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp; @@ -703,13 +835,13 @@ __global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, if(d0PP<1e6) { - uint kNeighbor0PM = neighbors0PM[k]; - uint kNeighbor0MP = neighbors0MP[k]; - uint kNeighbor0MM = neighbors0MM[k]; + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; - real d0PM = weights0PM[k]; - real d0MP = weights0MP[k]; - real d0MM = weights0MM[k]; + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); @@ -761,10 +893,15 @@ __global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, f7NextInterp = f7Next[kNeighbor0PP]; f8NextInterp = f8Next[kNeighbor0PP]; } + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! Distributions27 dist; getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); - unsigned int KQK = subgridDistanceIndices[k]; + unsigned int KQK = subgridDistanceIndices[nodeIndex]; // unsigned int k000= KQK; unsigned int kP00 = KQK; // unsigned int kM00 = neighborX[KQK]; @@ -804,36 +941,84 @@ __global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//NOTE: Has not been tested after bug fix! -__global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, - real* subgridDistances, - int sizeQ, - int numberOfBCnodes, - int numberOfPrecursorNodes, - real* distributions, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighbors0PP, - uint* neighbors0PM, - uint* neighbors0MP, - uint* neighbors0MM, - real* weights0PP, - real* weights0PM, - real* weights0MP, - real* weights0MM, - real* fsLast, - real* fsNext, - real timeRatio, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// NOTE: Has not been tested after bug fix! +__global__ void QPrecursorDeviceDistributions( + int* subgridDistanceIndices, + real* subgridDistances, + int sizeQ, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* fsLast, + real* fsNext, + real timeRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k>=numberOfBCnodes) return; + if(nodeIndex>=numberOfBCnodes) return; - uint kNeighbor0PP = neighbors0PP[k]; - real d0PP = weights0PP[k]; + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp; real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp; @@ -861,13 +1046,13 @@ __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, if(d0PP<1e6) { - uint kNeighbor0PM = neighbors0PM[k]; - uint kNeighbor0MP = neighbors0MP[k]; - uint kNeighbor0MM = neighbors0MM[k]; + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; - real d0PM = weights0PM[k]; - real d0MP = weights0MP[k]; - real d0MM = weights0MM[k]; + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); @@ -919,10 +1104,15 @@ __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, f7NextInterp = f7Next[kNeighbor0PP]; f8NextInterp = f8Next[kNeighbor0PP]; } + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! Distributions27 dist; getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); - unsigned int KQK = subgridDistanceIndices[k]; + unsigned int KQK = subgridDistanceIndices[nodeIndex]; // unsigned int k000= KQK; unsigned int kP00 = KQK; // unsigned int kM00 = neighborX[KQK]; @@ -953,15 +1143,15 @@ __global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, getPointersToSubgridDistances(qs, subgridDistances, sizeQ); real q; - q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio; - q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio; - q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio; - q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio; - q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio; - q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio; - q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio; - q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio; - q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio; + q = qs.q[DIR_P00][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio; + q = qs.q[DIR_PP0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio; + q = qs.q[DIR_PM0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio; + q = qs.q[DIR_P0P][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio; + q = qs.q[DIR_P0M][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio; + q = qs.q[DIR_PPP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio; + q = qs.q[DIR_PMP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio; + q = qs.q[DIR_PPM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio; + q = qs.q[DIR_PMM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu index 08be4b187eff21fa1a1b071337f14cbcc29ba805..02cfd2bce3723162b645cef568c87ca3b1dd2720 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu @@ -1,27 +1,58 @@ -/* Device code */ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PressBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" #include "lbm/MacroscopicQuantities.h" -#include "Kernel/Utilities/DistributionHelper.cuh" - -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QInflowScaleByPressDevice27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QInflowScaleByPressDevice27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -468,17 +499,18 @@ __global__ void QInflowScaleByPressDevice27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceIncompNEQ27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceIncompNEQ27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -807,54 +839,49 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceNEQ27(real* rhoBC, - real* distribution, - int* bcNodeIndices, - int* bcNeighborIndices, - int numberOfBCnodes, - real omega1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceNEQ27( + real* rhoBC, + real* distributions, + int* bcNodeIndices, + int* bcNeighborIndices, + int numberOfBCnodes, + real omega1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - ////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// //! The pressure boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - ////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> //! Distributions27 dist; - getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep); + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - Set local pressure //! - real rhoBClocal = rhoBC[k]; + real rhoBClocal = rhoBC[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int KQK = bcNodeIndices[k]; + unsigned int KQK = bcNodeIndices[nodeIndex]; unsigned int kzero= KQK; unsigned int ke = KQK; unsigned int kw = neighborX[KQK]; @@ -885,7 +912,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) for neighboring node //! - unsigned int K1QK = bcNeighborIndices[k]; + unsigned int K1QK = bcNeighborIndices[nodeIndex]; unsigned int k1zero= K1QK; unsigned int k1e = K1QK; unsigned int k1w = neighborX[K1QK]; @@ -1110,16 +1137,17 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// -__global__ void LB_BC_Press_East27( int nx, - int ny, - int tz, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void LB_BC_Press_East27( + int nx, + int ny, + int tz, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //thread-index int ty = blockIdx.x; @@ -1419,17 +1447,18 @@ __global__ void LB_BC_Press_East27( int nx, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDevice27(real* rhoBC, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDevice27( + real* rhoBC, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -1902,20 +1931,21 @@ __global__ void QPressDevice27(real* rhoBC, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceAntiBB27( real* rhoBC, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceAntiBB27( + real* rhoBC, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -2367,16 +2397,17 @@ __global__ void QPressDeviceAntiBB27( real* rhoBC, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceFixBackflow27( real* rhoBC, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceFixBackflow27( + real* rhoBC, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -2558,16 +2589,17 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceDirDepBot27( real* rhoBC, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceDirDepBot27( + real* rhoBC, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -2802,30 +2834,32 @@ __host__ __device__ real computeOutflowDistribution(const real* const &f, const } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressNoRhoDevice27( real* rhoBC, - real* distributions, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep, - int direction) +__global__ void QPressNoRhoDevice27( + real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + int direction) { //////////////////////////////////////////////////////////////////////////////// + //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - - const unsigned k = vf::gpu::getNodeIndex(); ////////////////////////////////////////////////////////////////////////// - if(k>=numberOfBCnodes) return; + if(nodeIndex >= numberOfBCnodes) return; //////////////////////////////////////////////////////////////////////////////// //index - unsigned int KQK = k_Q[k]; + unsigned int KQK = k_Q[nodeIndex]; // unsigned int kzero= KQK; unsigned int ke = KQK; unsigned int kw = neighborX[KQK]; @@ -2855,7 +2889,7 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// //index1 - unsigned int K1QK = k_N[k]; + unsigned int K1QK = k_N[nodeIndex]; //unsigned int k1zero= K1QK; unsigned int k1e = K1QK; unsigned int k1w = neighborX[K1QK]; @@ -3027,38 +3061,76 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, break; } } - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// __host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight) { return f1[dir ] * cs + (c1o1 - cs) * f[dir ] - weight *rhoCorrection; } -__global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, - real* distributions, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep, - int direction, - real densityCorrectionFactor) +__global__ void QPressZeroRhoOutflowDevice27( + real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor) { //////////////////////////////////////////////////////////////////////////////// - const unsigned k = vf::gpu::getNodeIndex(); + //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// - if(k>=numberOfBCnodes) return; + if( nodeIndex >= numberOfBCnodes ) return; + //////////////////////////////////////////////////////////////////////////////// //index - uint k_000 = k_Q[k]; + uint k_000 = k_Q[nodeIndex]; uint k_M00 = neighborX[k_000]; uint k_0M0 = neighborY[k_000]; uint k_00M = neighborZ[k_000]; @@ -3069,7 +3141,7 @@ __global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////// //index of neighbor - uint kN_000 = k_N[k]; + uint kN_000 = k_N[nodeIndex]; uint kN_M00 = neighborX[k_000]; uint kN_0M0 = neighborY[k_000]; uint kN_00M = neighborZ[k_000]; @@ -3255,17 +3327,18 @@ __global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceOld27(real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceOld27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -3514,18 +3587,19 @@ __global__ void QPressDeviceOld27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceEQZ27(real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - real* kTestRE, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceEQZ27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + real* kTestRE, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -4295,14 +4369,15 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceZero27( real* DD, - int* k_Q, - unsigned int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceZero27( + real* DD, + int* k_Q, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -4482,17 +4557,18 @@ __global__ void QPressDeviceZero27( real* DD, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceFake27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceFake27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -4756,17 +4832,18 @@ __global__ void QPressDeviceFake27( real* rhoBC, ////////////////////////////////////////////////////////////////////////// -__global__ void QPressDevice27_IntBB(real* rho, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDevice27_IntBB( + real* rho, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu index d0f272b4aa7b8ce6477efbe71c6f3ba8f48c6aaf..cc8ca53d15ac02686b850a70ab181bb47285a7d1 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu @@ -1,23 +1,56 @@ -/* Device code */ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SlipBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// -__global__ void QSlipDevice27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QSlipDevice27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -659,32 +692,26 @@ __global__ void QSlipDevice27(real* DD, ////////////////////////////////////////////////////////////////////////////// __global__ void QSlipDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -702,7 +729,7 @@ __global__ void QSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -804,7 +831,7 @@ __global__ void QSlipDeviceComp27( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -816,7 +843,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -828,7 +855,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -840,7 +867,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -852,7 +879,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -864,7 +891,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -876,7 +903,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -890,7 +917,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -904,7 +931,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -918,7 +945,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -932,7 +959,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -946,7 +973,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -955,12 +982,12 @@ __global__ void QSlipDeviceComp27( if (z == true) VeloZ = c0o1; velocityLB = -vx1 - vx3; - feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); velocityBC = -VeloX - VeloZ; (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -974,7 +1001,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -988,7 +1015,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1002,7 +1029,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1017,7 +1044,7 @@ __global__ void QSlipDeviceComp27( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1031,7 +1058,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1045,7 +1072,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1060,7 +1087,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1076,7 +1103,7 @@ __global__ void QSlipDeviceComp27( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1091,7 +1118,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1106,7 +1133,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1121,7 +1148,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1136,7 +1163,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1151,7 +1178,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1169,34 +1196,53 @@ __global__ void QSlipDeviceComp27( } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + ////////////////////////////////////////////////////////////////////////////// __global__ void BBSlipDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -1214,7 +1260,7 @@ __global__ void BBSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -1316,7 +1362,7 @@ __global__ void BBSlipDeviceComp27( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -1326,7 +1372,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_M00])[kw] = getBounceBackDistributionForVeloBC(f_W, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -1336,7 +1382,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_P00])[ke] = getBounceBackDistributionForVeloBC(f_E, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1346,7 +1392,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0M0])[ks] = getBounceBackDistributionForVeloBC(f_S, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1356,7 +1402,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0P0])[kn] = getBounceBackDistributionForVeloBC(f_N, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1366,7 +1412,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_00M])[kb] = getBounceBackDistributionForVeloBC(f_B, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1376,7 +1422,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_00P])[kt] = getBounceBackDistributionForVeloBC(f_T, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1388,7 +1434,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MM0])[ksw] = getBounceBackDistributionForVeloBC(f_SW, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1400,7 +1446,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PP0])[kne] = getBounceBackDistributionForVeloBC(f_NE, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1412,7 +1458,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MP0])[knw] = getBounceBackDistributionForVeloBC(f_NW, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1424,7 +1470,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PM0])[kse] = getBounceBackDistributionForVeloBC(f_SE, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1436,7 +1482,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_M0M])[kbw] = getBounceBackDistributionForVeloBC(f_BW, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1444,11 +1490,11 @@ __global__ void BBSlipDeviceComp27( if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; - velocityBC = -VeloX - VeloZ; - (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1460,7 +1506,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_M0P])[ktw] = getBounceBackDistributionForVeloBC(f_TW, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1472,7 +1518,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_P0M])[kbe] = getBounceBackDistributionForVeloBC(f_BE, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1484,7 +1530,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0MM])[kbs] = getBounceBackDistributionForVeloBC(f_BS, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1497,7 +1543,7 @@ __global__ void BBSlipDeviceComp27( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1509,7 +1555,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0MP])[kts] = getBounceBackDistributionForVeloBC(f_TS, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1521,7 +1567,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0PM])[kbn] = getBounceBackDistributionForVeloBC(f_BN, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1535,7 +1581,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getBounceBackDistributionForVeloBC(f_TNE, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1550,7 +1596,7 @@ __global__ void BBSlipDeviceComp27( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1564,7 +1610,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getBounceBackDistributionForVeloBC(f_TSW, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1578,7 +1624,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PPM])[kbne] = getBounceBackDistributionForVeloBC(f_BNE, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1592,7 +1638,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getBounceBackDistributionForVeloBC(f_BNW, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1606,7 +1652,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PMP])[ktse] = getBounceBackDistributionForVeloBC(f_TSE, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1620,7 +1666,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getBounceBackDistributionForVeloBC(f_TNW, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1638,35 +1684,55 @@ __global__ void BBSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + //////////////////////////////////////////////////////////////////////////// __global__ void QSlipDeviceComp27TurbViscosity( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* turbViscosity, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* turbViscosity, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -1684,7 +1750,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -1791,7 +1857,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -1803,7 +1869,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -1815,7 +1881,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1827,7 +1893,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1839,7 +1905,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1851,7 +1917,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1863,7 +1929,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1877,7 +1943,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1891,7 +1957,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1905,7 +1971,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1919,7 +1985,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1933,7 +1999,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1941,13 +2007,13 @@ __global__ void QSlipDeviceComp27TurbViscosity( if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; - velocityLB = -vx1 - vx3; - feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - velocityBC = -VeloX - VeloZ; - (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54); + velocityLB = -vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1961,7 +2027,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1975,7 +2041,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1989,7 +2055,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2004,7 +2070,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2018,7 +2084,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2032,7 +2098,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2047,7 +2113,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2063,7 +2129,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2078,7 +2144,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2093,7 +2159,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2108,7 +2174,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2123,7 +2189,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2138,7 +2204,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2154,37 +2220,59 @@ __global__ void QSlipDeviceComp27TurbViscosity( } } } +//////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + + //////////////////////////////////////////////////////////////////////////// __global__ void QSlipPressureDeviceComp27TurbViscosity( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* turbViscosity, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* turbViscosity, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -2202,7 +2290,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -2309,7 +2397,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -2321,7 +2409,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -2333,7 +2421,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -2345,7 +2433,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -2357,7 +2445,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -2369,7 +2457,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -2381,7 +2469,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2395,7 +2483,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2409,7 +2497,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2423,7 +2511,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2437,7 +2525,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2451,7 +2539,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2459,13 +2547,13 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; - velocityLB = -vx1 - vx3; - feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - velocityBC = -VeloX - VeloZ; - (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54); + velocityLB = -vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2479,7 +2567,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2493,7 +2581,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2507,7 +2595,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2522,7 +2610,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2536,7 +2624,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2550,7 +2638,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2565,7 +2653,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2581,7 +2669,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2596,7 +2684,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2611,7 +2699,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2626,7 +2714,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2641,7 +2729,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2656,7 +2744,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -3378,19 +3466,20 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( ////////////////////////////////////////////////////////////////////////////// -__global__ void QSlipGeomDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QSlipGeomDeviceComp27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -4264,19 +4353,20 @@ __global__ void QSlipGeomDeviceComp27(real* DD, ////////////////////////////////////////////////////////////////////////////// -__global__ void QSlipNormDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QSlipNormDeviceComp27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu index 5563f93bf0e28afc48ddfe7d8d3c7c21fb0623ab..3208299e93940dabe52faa7d0b3c684c45596660 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu @@ -43,28 +43,30 @@ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include <lbm/constants/NumericConstants.h> -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// -__host__ __device__ __forceinline__ void iMEM(uint k, uint kN, - real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ, - real* vx, real* vy, real* vz, - real* vx_el, real* vy_el, real* vz_el, //!>mean (temporally filtered) velocities at exchange location - real* vx_w_mean, real* vy_w_mean, real* vz_w_mean, //!>mean (temporally filtered) velocities at wall-adjactent node - real vx_w_inst, real vy_w_inst, real vz_w_inst, //!>instantaneous velocities at wall-adjactent node - real rho, - int* samplingOffset, - real q, - real forceFactor, //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015) - real eps, //!>filter constant in temporal averaging - real* z0, //!>aerodynamic roughness length - bool hasWallModelMonitor, - real* u_star_monitor, - real wallMomentumX, real wallMomentumY, real wallMomentumZ, - real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ) +__host__ __device__ __forceinline__ void iMEM( + uint k, uint kN, + real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ, + real* vx, real* vy, real* vz, + real* vx_el, real* vy_el, real* vz_el, //!>mean (temporally filtered) velocities at exchange location + real* vx_w_mean, real* vy_w_mean, real* vz_w_mean, //!>mean (temporally filtered) velocities at wall-adjactent node + real vx_w_inst, real vy_w_inst, real vz_w_inst, //!>instantaneous velocities at wall-adjactent node + real rho, + int* samplingOffset, + real q, + real forceFactor, //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015) + real eps, //!>filter constant in temporal averaging + real* z0, //!>aerodynamic roughness length + bool hasWallModelMonitor, + real* u_star_monitor, + real wallMomentumX, real wallMomentumY, real wallMomentumZ, + real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ) { real wallNormalX = _wallNormalX[k]; real wallNormalY = _wallNormalY[k]; @@ -136,37 +138,38 @@ __host__ __device__ __forceinline__ void iMEM(uint k, uint kN, } ////////////////////////////////////////////////////////////////////////////// -__global__ void QStressDeviceComp27(real* DD, - int* k_Q, - int* k_N, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* turbViscosity, - real* vx, - real* vy, - real* vz, - real* normalX, - real* normalY, - real* normalZ, - real* vx_el, - real* vy_el, - real* vz_el, - real* vx_w_mean, - real* vy_w_mean, - real* vz_w_mean, - int* samplingOffset, - real* z0, - bool hasWallModelMonitor, - real* u_star_monitor, - real* Fx_monitor, - real* Fy_monitor, - real* Fz_monitor, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QStressDeviceComp27( + real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_el, + real* vy_el, + real* vz_el, + real* vx_w_mean, + real* vy_w_mean, + real* vz_w_mean, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu index da209cd468db3b72ffc058fbe1ec4d76ca7960e5..3f440454ef272b13c24fe2a2882d67d32d32a841 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu @@ -9,14 +9,16 @@ /* Device code */ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> +#include "lbm/constants/NumericConstants.h" #include "lbm/MacroscopicQuantities.h" #include "../Kernel/Utilities/DistributionHelper.cuh" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// __global__ void CalcTurbulenceIntensity( @@ -37,16 +39,18 @@ __global__ void CalcTurbulenceIntensity( unsigned long long numberOfLBnodes, bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if (k >= numberOfLBnodes) + if (nodeIndex >= numberOfLBnodes) return; - if (!vf::gpu::isValidFluidNode(typeOfGridNode[k])) + if (!isValidFluidNode(typeOfGridNode[nodeIndex])) return; - vf::gpu::DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, - neighborZ); + DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ); const auto &distribution = distr_wrapper.distribution; // analogue to LBCalcMacCompSP27 @@ -58,16 +62,16 @@ __global__ void CalcTurbulenceIntensity( // compute subtotals: // fluctuations - vxx[k] = vxx[k] + vx * vx; - vyy[k] = vyy[k] + vy * vy; - vzz[k] = vzz[k] + vz * vz; - vxy[k] = vxy[k] + vx * vy; - vxz[k] = vxz[k] + vx * vz; - vyz[k] = vyz[k] + vy * vz; + vxx[nodeIndex] = vxx[nodeIndex] + vx * vx; + vyy[nodeIndex] = vyy[nodeIndex] + vy * vy; + vzz[nodeIndex] = vzz[nodeIndex] + vz * vz; + vxy[nodeIndex] = vxy[nodeIndex] + vx * vy; + vxz[nodeIndex] = vxz[nodeIndex] + vx * vz; + vyz[nodeIndex] = vyz[nodeIndex] + vy * vz; // velocity (for mean velocity) - vx_mean[k] = vx_mean[k] + vx; - vy_mean[k] = vy_mean[k] + vy; - vz_mean[k] = vz_mean[k] + vz; + vx_mean[nodeIndex] = vx_mean[nodeIndex] + vx; + vy_mean[nodeIndex] = vy_mean[nodeIndex] + vy; + vz_mean[nodeIndex] = vz_mean[nodeIndex] + vz; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu index 20f6e83350ba5bde0c84b4498281e4a04e4d957f..7147629c448b8b730e4ae8c4eff8a0a400863de9 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu @@ -38,7 +38,7 @@ #include <cuda_runtime.h> #include <helper_cuda.h> #include "LBM/LB.h" -#include "Kernel/Utilities/DistributionHelper.cuh" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; @@ -53,26 +53,31 @@ __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM dvz = ((fluidP ? vz[kP] : vz[k])-(fluidM ? vz[kM] : vz[k]))*div; } -__global__ void calcAMD(real* vx, - real* vy, - real* vz, - real* turbulentViscosity, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighborWSB, - uint* typeOfGridNode, - unsigned long long numberOfLBnodes, - real SGSConstant) +__global__ void calcAMD( + real* vx, + real* vy, + real* vz, + real* turbulentViscosity, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborWSB, + uint* typeOfGridNode, + unsigned long long numberOfLBnodes, + real SGSConstant) { - const uint k = vf::gpu::getNodeIndex(); - if(k >= numberOfLBnodes) return; - if(typeOfGridNode[k] != GEO_FLUID) return; + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); - uint kPx = neighborX[k]; - uint kPy = neighborY[k]; - uint kPz = neighborZ[k]; - uint kMxyz = neighborWSB[k]; + if(nodeIndex >= numberOfLBnodes) return; + if(typeOfGridNode[nodeIndex] != GEO_FLUID) return; + + uint kPx = neighborX[nodeIndex]; + uint kPy = neighborY[nodeIndex]; + uint kPz = neighborZ[nodeIndex]; + uint kMxyz = neighborWSB[nodeIndex]; uint kMx = neighborZ[neighborY[kMxyz]]; uint kMy = neighborZ[neighborX[kMxyz]]; uint kMz = neighborY[neighborX[kMxyz]]; @@ -81,9 +86,9 @@ __global__ void calcAMD(real* vx, dvydx, dvydy, dvydz, dvzdx, dvzdy, dvzdz; - calcDerivatives(k, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx); - calcDerivatives(k, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy); - calcDerivatives(k, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz); + calcDerivatives(nodeIndex, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx); + calcDerivatives(nodeIndex, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy); + calcDerivatives(nodeIndex, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz); real denominator = dvxdx*dvxdx + dvydx*dvydx + dvzdx*dvzdx + dvxdy*dvxdy + dvydy*dvydy + dvzdy*dvzdy + @@ -95,7 +100,7 @@ __global__ void calcAMD(real* vx, (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy); - turbulentViscosity[k] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1; + turbulentViscosity[nodeIndex] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1; } void calcTurbulentViscosityAMD(Parameter* para, int level) diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu index 12ff3af8ea9a1f57c64d560b63404920f2d4a8ff..ccf9d1771ec0e1895e5cb79fae63675429b02c73 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu @@ -1,35 +1,59 @@ -// _ ___ __ __________ _ __ ______________ __ -// | | / (_)____/ /___ ______ _/ / ____/ /_ __(_)___/ /____ / ___/ __ / / / / -// | | / / / ___/ __/ / / / __ `/ / /_ / / / / / / __ / ___/ / /___/ /_/ / / / / -// | |/ / / / / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__ ) / /_) / ____/ /__/ / -// |___/_/_/ \__/\__,_/\__,_/_/_/ /_/\__,_/_/\__,_/____/ \____/_/ \_____/ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -////////////////////////////////////////////////////////////////////////// - -/* Device code */ +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file VelocityBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceCompPlusSlip27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -553,18 +577,19 @@ __global__ void QVelDeviceCompPlusSlip27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QVeloDeviceEQ27(real* VeloX, - real* VeloY, - real* VeloZ, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QVeloDeviceEQ27( + real* VeloX, + real* VeloY, + real* VeloZ, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -834,18 +859,18 @@ __global__ void QVeloDeviceEQ27(real* VeloX, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// __global__ void QVeloStreetDeviceEQ27( - real* veloXfraction, - real* veloYfraction, - int* naschVelo, - real* DD, - int* naschIndex, - int numberOfStreetNodes, - real velocityRatio, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* veloXfraction, + real* veloYfraction, + int* naschVelo, + real* DD, + int* naschIndex, + int numberOfStreetNodes, + real velocityRatio, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1120,19 +1145,19 @@ __global__ void QVeloStreetDeviceEQ27( ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceIncompHighNu27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -1618,19 +1643,19 @@ __global__ void QVelDeviceIncompHighNu27( ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceCompHighNu27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -2194,39 +2219,32 @@ __global__ void QVelDeviceCompHighNu27( ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceCompZeroPress27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distribution, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* velocityX, + real* velocityY, + real* velocityZ, + real* distribution, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// - //! The velocity boundary condition is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + //! The velocity boundary condition is executed in the following steps + //! + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// @@ -2239,9 +2257,9 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Set local velocities //! - real VeloX = velocityX[k]; - real VeloY = velocityY[k]; - real VeloZ = velocityZ[k]; + real VeloX = velocityX[nodeIndex]; + real VeloY = velocityY[nodeIndex]; + real VeloZ = velocityZ[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// @@ -2253,7 +2271,7 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int KQK = subgridDistanceIndices[k]; + unsigned int KQK = subgridDistanceIndices[nodeIndex]; unsigned int kzero= KQK; unsigned int ke = KQK; unsigned int kw = neighborX[KQK]; @@ -2342,7 +2360,7 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Update distributions with subgrid distance (q) between zero and one real feq, q, velocityLB, velocityBC; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -2351,7 +2369,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -2360,7 +2378,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -2369,7 +2387,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -2378,7 +2396,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -2387,7 +2405,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -2396,7 +2414,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -2405,7 +2423,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -2414,7 +2432,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -2423,7 +2441,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -2432,7 +2450,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -2441,7 +2459,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -2450,7 +2468,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -2459,7 +2477,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -2468,7 +2486,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -2477,7 +2495,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -2486,7 +2504,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -2495,7 +2513,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -2504,7 +2522,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -2513,7 +2531,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -2522,7 +2540,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -2531,7 +2549,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -2540,7 +2558,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -2549,7 +2567,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -2558,7 +2576,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -2567,7 +2585,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -2619,26 +2637,27 @@ __global__ void QVelDeviceCompZeroPress27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDeviceCompZeroPress1h27( int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real Phi, - real angularVelocity, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QVelDeviceCompZeroPress1h27( + int inx, + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real Phi, + real angularVelocity, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -3090,21 +3109,22 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx, ////////////////////////////////////////////////////////////////////////////// -__global__ void LB_BC_Vel_West_27( int nx, - int ny, - int nz, - int itz, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - unsigned long long numberOfLBnodes, - bool isEvenTimestep, - real u0x, - unsigned int grid_nx, - unsigned int grid_ny, - real om) +__global__ void LB_BC_Vel_West_27( + int nx, + int ny, + int nz, + int itz, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + real u0x, + unsigned int grid_nx, + unsigned int grid_ny, + real om) { //thread-index unsigned int ity = blockIdx.x; @@ -3414,18 +3434,18 @@ __global__ void LB_BC_Vel_West_27( int nx, ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDevPlainBB27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - uint numberOfBCnodes, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* velocityX, + real* velocityY, + real* velocityZ, + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + uint numberOfBCnodes, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The velocity boundary condition is executed in the following steps @@ -3433,18 +3453,11 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// // run for all indices in size of boundary condition (numberOfBCnodes) - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -3456,9 +3469,9 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Set local velocities //! - real VeloX = velocityX[k]; - real VeloY = velocityY[k]; - real VeloZ = velocityZ[k]; + real VeloX = velocityX[nodeIndex]; + real VeloY = velocityY[nodeIndex]; + real VeloZ = velocityZ[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// //! - Set local subgrid distances (q's) @@ -3469,7 +3482,7 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - uint indexOfBCnode = subgridDistanceIndices[k]; + uint indexOfBCnode = subgridDistanceIndices[nodeIndex]; uint ke = indexOfBCnode; uint kw = neighborX[indexOfBCnode]; uint kn = indexOfBCnode; @@ -3535,32 +3548,32 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - rewrite distributions if there is a sub-grid distance (q) in same direction real q; - q = (subgridD.q[DIR_P00])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00])[kw ]=f_E + c4o9 * (-VeloX); - q = (subgridD.q[DIR_M00])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00])[ke ]=f_W + c4o9 * ( VeloX); - q = (subgridD.q[DIR_0P0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0])[ks ]=f_N + c4o9 * (-VeloY); - q = (subgridD.q[DIR_0M0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0])[kn ]=f_S + c4o9 * ( VeloY); - q = (subgridD.q[DIR_00P])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M])[kb ]=f_T + c4o9 * (-VeloZ); - q = (subgridD.q[DIR_00M])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P])[kt ]=f_B + c4o9 * ( VeloZ); - q = (subgridD.q[DIR_PP0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0])[ksw ]=f_NE + c1o9 * (-VeloX - VeloY); - q = (subgridD.q[DIR_MM0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0])[kne ]=f_SW + c1o9 * ( VeloX + VeloY); - q = (subgridD.q[DIR_PM0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0])[knw ]=f_SE + c1o9 * (-VeloX + VeloY); - q = (subgridD.q[DIR_MP0])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0])[kse ]=f_NW + c1o9 * ( VeloX - VeloY); - q = (subgridD.q[DIR_P0P])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M])[kbw ]=f_TE + c1o9 * (-VeloX - VeloZ); - q = (subgridD.q[DIR_M0M])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P])[kte ]=f_BW + c1o9 * ( VeloX + VeloZ); - q = (subgridD.q[DIR_P0M])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P])[ktw ]=f_BE + c1o9 * (-VeloX + VeloZ); - q = (subgridD.q[DIR_M0P])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M])[kbe ]=f_TW + c1o9 * ( VeloX - VeloZ); - q = (subgridD.q[DIR_0PP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM])[kbs ]=f_TN + c1o9 * (-VeloY - VeloZ); - q = (subgridD.q[DIR_0MM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP])[ktn ]=f_BS + c1o9 * ( VeloY + VeloZ); - q = (subgridD.q[DIR_0PM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP])[kts ]=f_BN + c1o9 * (-VeloY + VeloZ); - q = (subgridD.q[DIR_0MP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM])[kbn ]=f_TS + c1o9 * ( VeloY - VeloZ); - q = (subgridD.q[DIR_PPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ); - q = (subgridD.q[DIR_MMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ); - q = (subgridD.q[DIR_PPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ); - q = (subgridD.q[DIR_MMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ); - q = (subgridD.q[DIR_PMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ); - q = (subgridD.q[DIR_MPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ); - q = (subgridD.q[DIR_PMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ); - q = (subgridD.q[DIR_MPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ); + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00])[kw ]=f_E + c4o9 * (-VeloX); + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00])[ke ]=f_W + c4o9 * ( VeloX); + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0])[ks ]=f_N + c4o9 * (-VeloY); + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0])[kn ]=f_S + c4o9 * ( VeloY); + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M])[kb ]=f_T + c4o9 * (-VeloZ); + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P])[kt ]=f_B + c4o9 * ( VeloZ); + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0])[ksw ]=f_NE + c1o9 * (-VeloX - VeloY); + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0])[kne ]=f_SW + c1o9 * ( VeloX + VeloY); + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0])[knw ]=f_SE + c1o9 * (-VeloX + VeloY); + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0])[kse ]=f_NW + c1o9 * ( VeloX - VeloY); + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M])[kbw ]=f_TE + c1o9 * (-VeloX - VeloZ); + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P])[kte ]=f_BW + c1o9 * ( VeloX + VeloZ); + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P])[ktw ]=f_BE + c1o9 * (-VeloX + VeloZ); + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M])[kbe ]=f_TW + c1o9 * ( VeloX - VeloZ); + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM])[kbs ]=f_TN + c1o9 * (-VeloY - VeloZ); + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP])[ktn ]=f_BS + c1o9 * ( VeloY + VeloZ); + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP])[kts ]=f_BN + c1o9 * (-VeloY + VeloZ); + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM])[kbn ]=f_TS + c1o9 * ( VeloY - VeloZ); + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ); + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ); + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ); + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ); + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ); + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ); + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ); + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -3604,19 +3617,20 @@ __global__ void QVelDevPlainBB27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDevCouette27(real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QVelDevCouette27( + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -3964,26 +3978,27 @@ __global__ void QVelDevCouette27(real* vx, ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDev1h27( int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real Phi, - real angularVelocity, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QVelDev1h27( + int inx, + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real Phi, + real angularVelocity, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -4748,39 +4763,32 @@ __global__ void QVelDev1h27( int inx, ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceComp27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) + real* velocityX, + real* velocityY, + real* velocityZ, + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The velocity boundary condition is executed in the following steps //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -4792,9 +4800,9 @@ __global__ void QVelDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set local velocities //! - real VeloX = velocityX[k]; - real VeloY = velocityY[k]; - real VeloZ = velocityZ[k]; + real VeloX = velocityX[nodeIndex]; + real VeloY = velocityY[nodeIndex]; + real VeloZ = velocityZ[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// //! - Set local subgrid distances (q's) @@ -4805,7 +4813,7 @@ __global__ void QVelDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -4894,7 +4902,7 @@ __global__ void QVelDeviceComp27( //! - Update distributions with subgrid distance (q) between zero and one //! real feq, q, velocityLB, velocityBC; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -4903,7 +4911,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -4912,7 +4920,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -4921,7 +4929,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -4930,7 +4938,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -4939,7 +4947,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -4948,7 +4956,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -4957,7 +4965,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -4966,7 +4974,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -4975,7 +4983,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -4984,7 +4992,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -4993,7 +5001,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -5002,7 +5010,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -5011,7 +5019,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -5020,7 +5028,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -5029,7 +5037,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -5038,7 +5046,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -5047,7 +5055,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -5056,7 +5064,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -5065,7 +5073,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -5074,7 +5082,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -5083,7 +5091,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -5092,7 +5100,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -5101,7 +5109,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -5110,7 +5118,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -5119,7 +5127,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -5170,21 +5178,22 @@ __global__ void QVelDeviceComp27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDevice27(int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned long long numberOfLBnodes, - bool isEvenTimestep) +__global__ void QVelDevice27( + int inx, + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) @@ -5723,19 +5732,20 @@ __global__ void QVelDevice27(int inx, //////////////////////////////////////////////////////////////////////////////// -__global__ void PropellerBC(unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* rho, - real* ux, - real* uy, - real* uz, - int* k_Q, - unsigned int size_Prop, - unsigned long long numberOfLBnodes, - unsigned int* bcMatD, - real* DD, - bool EvenOrOdd) +__global__ void PropellerBC( + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* rho, + real* ux, + real* uy, + real* uz, + int* k_Q, + unsigned int size_Prop, + unsigned long long numberOfLBnodes, + unsigned int* bcMatD, + real* DD, + bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu index 5598f0fc229ac446c8d5393ca9e6ff70577bae1a..1ffec96c255b7923f3ee39c01f756abd8cad8862 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu @@ -38,21 +38,20 @@ //! required options are switched on ( \param writeMacroscopicVariables and/or \param applyBodyForce) in order to minimize memory accesses. The default //! refers to the plain cumlant kernel (CollisionTemplate::Default). //! Nodes are added to subsets (taggedFluidNodes) in Simulation::init using a corresponding tag with different values of CollisionTemplate. These subsets -//! are provided by the utilized PostCollisionInteractiors depending on they specifc requirements (e.g. writeMacroscopicVariables for probes). +//! are provided by the utilized PostCollisionInteractiors depending on they specific requirements (e.g. writeMacroscopicVariables for probes). //======================================================================================= -/* Device code */ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> -#include "Kernel/Utilities/DistributionHelper.cuh" +#include "lbm/constants/NumericConstants.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" #include "GPU/TurbulentViscosityInlines.cuh" using namespace vf::lbm::constant; using namespace vf::lbm::dir; -#include "Kernel/Utilities/ChimeraTransformation.h" - +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////// template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce> @@ -90,16 +89,16 @@ __global__ void LB_Kernel_CumulantK17( //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned kThread = vf::gpu::getNodeIndex(); + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// // run for all indices in size_Mat and fluid nodes - if (kThread >= numberOfFluidNodes) + if (nodeIndex >= numberOfFluidNodes) return; //////////////////////////////////////////////////////////////////////////////// //! - Get the node index from the array containing all indices of fluid nodes //! - const unsigned k_000 = fluidNodeIndices[kThread]; + const unsigned k_000 = fluidNodeIndices[nodeIndex]; ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on @@ -107,7 +106,8 @@ __global__ void LB_Kernel_CumulantK17( //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep); + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu index 3eea267e55fee45111fb11cf1258559e2c3c63f2..a0db78d27b00372feab8490111183481abbec8b9 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu @@ -33,11 +33,12 @@ /* Device code */ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> +#include "lbm/constants/NumericConstants.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; -#include "Kernel/Utilities/ChimeraTransformation.h" +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////// __global__ void LB_Kernel_CumulantK17CompChim( diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh index 558b4f333e7c92b372a5097aa4917dd6d1230a34..3be594e3e39a57cd71741cd060e9dddda15d6035 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh @@ -5,7 +5,7 @@ #include <DataTypes.h> #include <cuda_runtime.h> -#include <lbm/KernelParameter.h> +#include "lbm/KernelParameter.h" #include "Kernel/Utilities/DistributionHelper.cuh" @@ -23,7 +23,7 @@ struct GPUKernelParameter unsigned int* neighborY; unsigned int* neighborZ; real* distributions; - int size_Mat; + int numberOfLBnodes; real* forces; bool isEvenTimestep; }; @@ -31,19 +31,22 @@ struct GPUKernelParameter template<typename KernelFunctor> __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter) { - const uint k = getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k >= kernelParameter.size_Mat) + if(nodeIndex >= kernelParameter.numberOfLBnodes) return; - if (!isValidFluidNode(kernelParameter.typeOfGridNode[k])) + if (!isValidFluidNode(kernelParameter.typeOfGridNode[nodeIndex])) return; DistributionWrapper distributionWrapper { kernelParameter.distributions, - (unsigned int)kernelParameter.size_Mat, + (unsigned int)kernelParameter.numberOfLBnodes, kernelParameter.isEvenTimestep, - k, + nodeIndex, kernelParameter.neighborX, kernelParameter.neighborY, kernelParameter.neighborZ diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h deleted file mode 100644 index f7822d63fa0efd34b27773dffdeebddf521a8792..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef CHIMERA_TRANSFORMATION_H -#define CHIMERA_TRANSFORMATION_H - -#include <lbm/constants/NumericConstants.h> - -using namespace vf::lbm::constant; - -//////////////////////////////////////////////////////////////////////////////// -//! \brief forward chimera transformation \ref forwardInverseChimeraWithK -//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> Modified for lower round-off errors. -inline __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) -{ - real m2 = mfa + mfc; - real m1 = mfc - mfa; - real m0 = m2 + mfb; - mfa = m0; - m0 *= Kinverse; - m0 += c1o1; - mfb = (m1 * Kinverse - m0 * vv) * K; - mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K; -} - -//////////////////////////////////////////////////////////////////////////////// -//! \brief backward chimera transformation \ref backwardInverseChimeraWithK -//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> Modified for lower round-off errors. -inline __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) -{ - real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K; - real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K; - mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K; - mfa = m0; - mfb = m1; -} - -//////////////////////////////////////////////////////////////////////////////// -//! \brief forward chimera transformation \ref forwardChimera -//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off -//! errors. -inline __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) -{ - real m1 = (mfa + mfc) + mfb; - real m2 = mfc - mfa; - mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2); - mfb = m2 - vv * m1; - mfa = m1; -} - -//////////////////////////////////////////////////////////////////////////////// -//! \brief backward chimera transformation \ref backwardChimera -//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off -//! errors. -inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) -{ - real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2); - real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv; - mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2); - mfb = mb; - mfa = ma; -} -#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu index 7c477c539dc3526389dc22563b50501e778a63f3..240a6ffbace64147aa67224fe72c946761fdc452 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu @@ -2,8 +2,7 @@ #include <cuda_runtime.h> - -#include <lbm/constants/NumericConstants.h> +#include "lbm/constants/NumericConstants.h" #include "lbm/constants/D3Q27.h" using namespace vf::lbm::dir; @@ -80,10 +79,4 @@ __device__ void DistributionWrapper::write() (distribution_references.f[DIR_000])[k] = distribution.f[vf::lbm::dir::ZZZ]; } -__device__ bool isValidFluidNode(uint nodeType) -{ - return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2); -} - - } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh index fec2403ecad70d1ea550750a5a33780aa35e07bd..599f3f46668c07da49725770177d77239f8ef9df 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh @@ -37,76 +37,13 @@ #include "lbm/KernelParameter.h" #include "lbm/constants/D3Q27.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::dir; namespace vf::gpu { -__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const unsigned long long numberOfLBnodes, const bool isEvenTimestep) -{ - if (isEvenTimestep) - { - dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; - dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes]; - dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes]; - dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; - dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes]; - dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes]; - dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes]; - dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes]; - dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes]; - dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes]; - dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes]; - dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes]; - dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes]; - dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes]; - dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes]; - dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes]; - dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes]; - dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes]; - dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes]; - dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes]; - dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes]; - dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes]; - dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes]; - dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes]; - dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes]; - dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes]; - dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes]; - } - else - { - dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes]; - dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes]; - dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; - dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes]; - dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes]; - dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes]; - dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes]; - dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes]; - dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes]; - dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes]; - dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes]; - dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes]; - dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes]; - dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes]; - dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes]; - dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes]; - dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes]; - dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes]; - dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; - dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes]; - dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes]; - dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes]; - dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes]; - dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes]; - dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes]; - dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes]; - dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes]; - } -} - /** * Getting references to the 27 directions. * @params distributions 1D real* array containing all data (number of elements = 27 * matrix_size) @@ -157,20 +94,6 @@ struct DistributionWrapper const uint kbsw; }; -__inline__ __device__ unsigned int getNodeIndex() -{ - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - return nx * (ny * z + y) + x; -} - -__device__ bool isValidFluidNode(uint nodeType); - } #endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h deleted file mode 100644 index 13ce5d88aaa7cb49225fa914c1f59c2de05802f5..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h +++ /dev/null @@ -1,148 +0,0 @@ -//======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ -// \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ -// -// This file is part of VirtualFluids. VirtualFluids is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \file scalingHelperFunctions.h -//! \ingroup GPU/Kernel/Utilities -//! \author Martin Schoenherr, Anna Wellmann -//======================================================================================= - -#ifndef SCALING_HELPER_FUNCTIONS_H -#define SCALING_HELPER_FUNCTIONS_H - -#include "LBM/LB.h" -#include "lbm/constants/D3Q27.h" -#include "lbm/constants/NumericConstants.h" - -using namespace vf::lbm::constant; -using namespace vf::lbm::dir; - -__device__ __inline__ void calculateMomentsOnSourceNodes( - Distributions27& dist, - real& omega, - unsigned int& k_000, - unsigned int& k_M00, - unsigned int& k_0M0, - unsigned int& k_00M, - unsigned int& k_MM0, - unsigned int& k_M0M, - unsigned int& k_0MM, - unsigned int& k_MMM, - real& drho, - real& velocityX, - real& velocityY, - real& velocityZ, - real& kxyFromfcNEQ, - real& kyzFromfcNEQ, - real& kxzFromfcNEQ, - real& kxxMyyFromfcNEQ, - real& kxxMzzFromfcNEQ - ){ - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions (f's) on source nodes: - //! - real f_000 = (dist.f[DIR_000])[k_000]; - real f_P00 = (dist.f[DIR_P00])[k_000]; - real f_M00 = (dist.f[DIR_M00])[k_M00]; - real f_0P0 = (dist.f[DIR_0P0])[k_000]; - real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; - real f_00P = (dist.f[DIR_00P])[k_000]; - real f_00M = (dist.f[DIR_00M])[k_00M]; - real f_PP0 = (dist.f[DIR_PP0])[k_000]; - real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; - real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; - real f_MP0 = (dist.f[DIR_MP0])[k_M00]; - real f_P0P = (dist.f[DIR_P0P])[k_000]; - real f_M0M = (dist.f[DIR_M0M])[k_M0M]; - real f_P0M = (dist.f[DIR_P0M])[k_00M]; - real f_M0P = (dist.f[DIR_M0P])[k_M00]; - real f_0PP = (dist.f[DIR_0PP])[k_000]; - real f_0MM = (dist.f[DIR_0MM])[k_0MM]; - real f_0PM = (dist.f[DIR_0PM])[k_00M]; - real f_0MP = (dist.f[DIR_0MP])[k_0M0]; - real f_PPP = (dist.f[DIR_PPP])[k_000]; - real f_MPP = (dist.f[DIR_MPP])[k_M00]; - real f_PMP = (dist.f[DIR_PMP])[k_0M0]; - real f_MMP = (dist.f[DIR_MMP])[k_MM0]; - real f_PPM = (dist.f[DIR_PPM])[k_00M]; - real f_MPM = (dist.f[DIR_MPM])[k_M0M]; - real f_PMM = (dist.f[DIR_PMM])[k_0MM]; - real f_MMM = (dist.f[DIR_MMM])[k_MMM]; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + - (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + - ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + - ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + - f_000; - - real oneOverRho = c1o1 / (c1o1 + drho); - - velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + - (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * - oneOverRho; - velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + - (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * - oneOverRho; - velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + - (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * - oneOverRho; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate second order moments for interpolation - //! - // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction - kxyFromfcNEQ = - -c3o1 * omega * - ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) / - (c1o1 + drho) - - ((velocityX * velocityY))); - kyzFromfcNEQ = - -c3o1 * omega * - ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) / - (c1o1 + drho) - - ((velocityY * velocityZ))); - kxzFromfcNEQ = - -c3o1 * omega * - ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) / - (c1o1 + drho) - - ((velocityX * velocityZ))); - kxxMyyFromfcNEQ = - -c3o2 * omega * - ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) / (c1o1 + drho) - - ((velocityX * velocityX - velocityY * velocityY))); - kxxMzzFromfcNEQ = - -c3o2 * omega * - ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) / (c1o1 + drho) - - ((velocityX * velocityX - velocityZ * velocityZ))); -} - -#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h new file mode 100644 index 0000000000000000000000000000000000000000..225f615ec3ad2d8ef11ec295f8d9e8a4166d99fe --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h @@ -0,0 +1,108 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ChimeraTransformation.h +//! \ingroup LBM/GPUHelperFunctions +//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters +//======================================================================================= +#ifndef CHIMERA_TRANSFORMATION_H +#define CHIMERA_TRANSFORMATION_H + +#include "LBM/LB.h" + +#include <lbm/constants/NumericConstants.h> + +using namespace vf::lbm::constant; + +namespace vf::gpu +{ + +//////////////////////////////////////////////////////////////////////////////// +//! \brief forward chimera transformation \ref forwardInverseChimeraWithK +//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> Modified for lower round-off errors. +__inline__ __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) +{ + real m2 = mfa + mfc; + real m1 = mfc - mfa; + real m0 = m2 + mfb; + mfa = m0; + m0 *= Kinverse; + m0 += c1o1; + mfb = (m1 * Kinverse - m0 * vv) * K; + mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K; +} + +//////////////////////////////////////////////////////////////////////////////// +//! \brief backward chimera transformation \ref backwardInverseChimeraWithK +//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> Modified for lower round-off errors. +__inline__ __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) +{ + real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K; + real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K; + mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K; + mfa = m0; + mfb = m1; +} + +//////////////////////////////////////////////////////////////////////////////// +//! \brief forward chimera transformation \ref forwardChimera +//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off +//! errors. +__inline__ __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) +{ + real m1 = (mfa + mfc) + mfb; + real m2 = mfc - mfa; + mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2); + mfb = m2 - vv * m1; + mfa = m1; +} + +//////////////////////////////////////////////////////////////////////////////// +//! \brief backward chimera transformation \ref backwardChimera +//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off +//! errors. +__inline__ __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) +{ + real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2); + real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv; + mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2); + mfb = mb; + mfa = ma; +} + +} // namespace vf::gpu + +#endif diff --git a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h similarity index 91% rename from src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h rename to src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h index 05f48ce46cf251ee573930e54209f87ed8b85b07..37208ee59586533fa7f8ffbc269246826ed27fb8 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h @@ -27,11 +27,11 @@ // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // //! \file KernelUtilities.h -//! \ingroup GPU -//! \author Martin Schoenherr, Anna Wellmann -//====================================================================================== -#ifndef KERNELUTILS_H -#define KERNELUTILS_H +//! \ingroup LBM/GPUHelperFunctions +//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters +//======================================================================================= +#ifndef KERNEL_UTILITIES_H +#define KERNEL_UTILITIES_H #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" @@ -40,10 +40,14 @@ using namespace vf::lbm::constant; using namespace vf::lbm::dir; -__inline__ __device__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const unsigned long long numberOfLBnodes, const bool isEvenTimestep) +namespace vf::gpu +{ + +__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const unsigned long long numberOfLBnodes, const bool isEvenTimestep) { if (isEvenTimestep) { + dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes]; dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes]; dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; @@ -62,7 +66,6 @@ __inline__ __device__ void getPointersToDistributions(Distributions27 &dist, rea dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes]; dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes]; dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes]; - dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes]; dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes]; dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes]; @@ -140,38 +143,56 @@ __inline__ __device__ real getEquilibriumForBC(const real& drho, const real& vel return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq); } -__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq, +__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq, const real& omega, const real& velocity, const real weight) { - return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q); } -__inline__ __device__ real getBounceBackDistributionForVeloBC( const real& f, +__inline__ __device__ real getBounceBackDistributionForVeloBC( const real& f, const real& velocity, const real weight) { return f - (c6o1 * weight * velocity); } -__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, +__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, const real& omega) { - return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + (q * (f + fInverse)) / (c1o1 + q); } -__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, +__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, const real& omega, const real& drho, const real& velocity, const real weight) { - return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho; } +__inline__ __device__ unsigned int getNodeIndex() +{ + const unsigned x = threadIdx.x; + const unsigned y = blockIdx.x; + const unsigned z = blockIdx.y; + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + return nx * (ny * z + y) + x; +} + +__inline__ __device__ bool isValidFluidNode(uint nodeType) +{ + return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2); +} + +} #endif diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h new file mode 100644 index 0000000000000000000000000000000000000000..53990e452be06dc6840c801816e8231d26861e2e --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h @@ -0,0 +1,136 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ScalingUtilities.h +//! \ingroup LBM/GPUHelperFunctions +//! \author Martin Schoenherr, Anna Wellmann +//======================================================================================= +#ifndef SCALING_HELPER_FUNCTIONS_H +#define SCALING_HELPER_FUNCTIONS_H + +#include "LBM/LB.h" +#include "lbm/constants/D3Q27.h" +#include "lbm/constants/NumericConstants.h" + +using namespace vf::lbm::constant; +using namespace vf::lbm::dir; + +namespace vf::gpu +{ + +__device__ __inline__ void calculateMomentsOnSourceNodes(Distributions27 &dist, real &omega, unsigned int &k_000, + unsigned int &k_M00, unsigned int &k_0M0, unsigned int &k_00M, + unsigned int &k_MM0, unsigned int &k_M0M, unsigned int &k_0MM, + unsigned int &k_MMM, real &drho, real &velocityX, + real &velocityY, real &velocityZ, real &kxyFromfcNEQ, + real &kyzFromfcNEQ, real &kxzFromfcNEQ, real &kxxMyyFromfcNEQ, + real &kxxMzzFromfcNEQ) +{ + //////////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions (f's) on source nodes: + //! + real f_000 = (dist.f[DIR_000])[k_000]; + real f_P00 = (dist.f[DIR_P00])[k_000]; + real f_M00 = (dist.f[DIR_M00])[k_M00]; + real f_0P0 = (dist.f[DIR_0P0])[k_000]; + real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; + real f_00P = (dist.f[DIR_00P])[k_000]; + real f_00M = (dist.f[DIR_00M])[k_00M]; + real f_PP0 = (dist.f[DIR_PP0])[k_000]; + real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; + real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; + real f_MP0 = (dist.f[DIR_MP0])[k_M00]; + real f_P0P = (dist.f[DIR_P0P])[k_000]; + real f_M0M = (dist.f[DIR_M0M])[k_M0M]; + real f_P0M = (dist.f[DIR_P0M])[k_00M]; + real f_M0P = (dist.f[DIR_M0P])[k_M00]; + real f_0PP = (dist.f[DIR_0PP])[k_000]; + real f_0MM = (dist.f[DIR_0MM])[k_0MM]; + real f_0PM = (dist.f[DIR_0PM])[k_00M]; + real f_0MP = (dist.f[DIR_0MP])[k_0M0]; + real f_PPP = (dist.f[DIR_PPP])[k_000]; + real f_MPP = (dist.f[DIR_MPP])[k_M00]; + real f_PMP = (dist.f[DIR_PMP])[k_0M0]; + real f_MMP = (dist.f[DIR_MMP])[k_MM0]; + real f_PPM = (dist.f[DIR_PPM])[k_00M]; + real f_MPM = (dist.f[DIR_MPM])[k_M0M]; + real f_PMM = (dist.f[DIR_PMM])[k_0MM]; + real f_MMM = (dist.f[DIR_MMM])[k_MMM]; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), + //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! + drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + + (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + + ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + + ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + + f_000; + + real oneOverRho = c1o1 / (c1o1 + drho); + + velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + + (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * + oneOverRho; + velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + + (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * + oneOverRho; + velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + + (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * + oneOverRho; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate second order moments for interpolation + //! + // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction + kxyFromfcNEQ = -c3o1 * omega * + ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) / + (c1o1 + drho) - + ((velocityX * velocityY))); + kyzFromfcNEQ = -c3o1 * omega * + ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) / + (c1o1 + drho) - + ((velocityY * velocityZ))); + kxzFromfcNEQ = -c3o1 * omega * + ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) / + (c1o1 + drho) - + ((velocityX * velocityZ))); + kxxMyyFromfcNEQ = -c3o2 * omega * + ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) / + (c1o1 + drho) - + ((velocityX * velocityX - velocityY * velocityY))); + kxxMzzFromfcNEQ = -c3o2 * omega * + ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) / + (c1o1 + drho) - + ((velocityX * velocityX - velocityZ * velocityZ))); +} + +} // namespace vf::gpu + +#endif diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu index ee3fe322da2ab7a7ea218b27287f2dd5d5d0fd24..f7bb2e680c0fb3ea597239ee0cbc1772f2efe81b 100644 --- a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu +++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu @@ -1,11 +1,41 @@ - +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file DistributionDebugInspector.cu +//! \ingroup Output +//! \author Henrik Asmuth, Henry Korb +//====================================================================================== #include "DistributionDebugInspector.h" #include "Parameter/Parameter.h" -#include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> -#include "Kernel/Utilities/DistributionHelper.cuh" +#include "lbm/constants/NumericConstants.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" #include <cuda/CudaGrid.h> #include <cuda.h> @@ -14,108 +44,114 @@ using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; + +__global__ void printFs( + real* distributions, + bool isEvenTimestep, + unsigned long long numberOfFluidNodes, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* typeOfGridNode, + real* coordX, + real* coordY, + real* coordZ, + real minX, + real maxX, + real minY, + real maxY, + real minZ, + real maxZ) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned k_000 = getNodeIndex(); + + if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) + return; -__global__ void printFs( real* distributions, - bool isEvenTimestep, - unsigned long long numberOfFluidNodes, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* typeOfGridNode, - real* coordX, - real* coordY, - real* coordZ, - real minX, - real maxX, - real minY, - real maxY, - real minZ, - real maxZ) - { - const unsigned k_000 = vf::gpu::getNodeIndex(); - - if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) - return; - - real coordNodeX = coordX[k_000]; - real coordNodeY = coordY[k_000]; - real coordNodeZ = coordZ[k_000]; - - if( coordNodeX>=minX && coordNodeX<=maxX && - coordNodeY>=minY && coordNodeY<=maxY && - coordNodeZ>=minZ && coordNodeZ<=maxZ ) - { - Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfFluidNodes, isEvenTimestep); - //////////////////////////////////////////////////////////////////////////////// - //! - Set neighbor indices (necessary for indirect addressing) - uint k_M00 = neighborX[k_000]; - uint k_0M0 = neighborY[k_000]; - uint k_00M = neighborZ[k_000]; - uint k_MM0 = neighborY[k_M00]; - uint k_M0M = neighborZ[k_M00]; - uint k_0MM = neighborZ[k_0M0]; - uint k_MMM = neighborZ[k_MM0]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions - //! - real f_000 = (dist.f[DIR_000])[k_000]; - real f_P00 = (dist.f[DIR_P00])[k_000]; - real f_M00 = (dist.f[DIR_M00])[k_M00]; - real f_0P0 = (dist.f[DIR_0P0])[k_000]; - real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; - real f_00P = (dist.f[DIR_00P])[k_000]; - real f_00M = (dist.f[DIR_00M])[k_00M]; - real f_PP0 = (dist.f[DIR_PP0])[k_000]; - real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; - real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; - real f_MP0 = (dist.f[DIR_MP0])[k_M00]; - real f_P0P = (dist.f[DIR_P0P])[k_000]; - real f_M0M = (dist.f[DIR_M0M])[k_M0M]; - real f_P0M = (dist.f[DIR_P0M])[k_00M]; - real f_M0P = (dist.f[DIR_M0P])[k_M00]; - real f_0PP = (dist.f[DIR_0PP])[k_000]; - real f_0MM = (dist.f[DIR_0MM])[k_0MM]; - real f_0PM = (dist.f[DIR_0PM])[k_00M]; - real f_0MP = (dist.f[DIR_0MP])[k_0M0]; - real f_PPP = (dist.f[DIR_PPP])[k_000]; - real f_MPP = (dist.f[DIR_MPP])[k_M00]; - real f_PMP = (dist.f[DIR_PMP])[k_0M0]; - real f_MMP = (dist.f[DIR_MMP])[k_MM0]; - real f_PPM = (dist.f[DIR_PPM])[k_00M]; - real f_MPM = (dist.f[DIR_MPM])[k_M0M]; - real f_PMM = (dist.f[DIR_PMM])[k_0MM]; - real f_MMM = (dist.f[DIR_MMM])[k_MMM]; - - real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + - (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + - ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + - ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + - f_000; - - real oneOverRho = c1o1 / (c1o1 + drho); - - real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + - (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * - oneOverRho; - real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + - (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * - oneOverRho; - real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + - (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * - oneOverRho; - - printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz); - printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM); - - } - - } - - - - -void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t){ - + real coordNodeX = coordX[k_000]; + real coordNodeY = coordY[k_000]; + real coordNodeZ = coordZ[k_000]; + + if( coordNodeX>=minX && coordNodeX<=maxX && + coordNodeY>=minY && coordNodeY<=maxY && + coordNodeZ>=minZ && coordNodeZ<=maxZ ) + { + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfFluidNodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// + //! - Set neighbor indices (necessary for indirect addressing) + uint k_M00 = neighborX[k_000]; + uint k_0M0 = neighborY[k_000]; + uint k_00M = neighborZ[k_000]; + uint k_MM0 = neighborY[k_M00]; + uint k_M0M = neighborZ[k_M00]; + uint k_0MM = neighborZ[k_0M0]; + uint k_MMM = neighborZ[k_MM0]; + //////////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions + //! + real f_000 = (dist.f[DIR_000])[k_000]; + real f_P00 = (dist.f[DIR_P00])[k_000]; + real f_M00 = (dist.f[DIR_M00])[k_M00]; + real f_0P0 = (dist.f[DIR_0P0])[k_000]; + real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; + real f_00P = (dist.f[DIR_00P])[k_000]; + real f_00M = (dist.f[DIR_00M])[k_00M]; + real f_PP0 = (dist.f[DIR_PP0])[k_000]; + real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; + real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; + real f_MP0 = (dist.f[DIR_MP0])[k_M00]; + real f_P0P = (dist.f[DIR_P0P])[k_000]; + real f_M0M = (dist.f[DIR_M0M])[k_M0M]; + real f_P0M = (dist.f[DIR_P0M])[k_00M]; + real f_M0P = (dist.f[DIR_M0P])[k_M00]; + real f_0PP = (dist.f[DIR_0PP])[k_000]; + real f_0MM = (dist.f[DIR_0MM])[k_0MM]; + real f_0PM = (dist.f[DIR_0PM])[k_00M]; + real f_0MP = (dist.f[DIR_0MP])[k_0M0]; + real f_PPP = (dist.f[DIR_PPP])[k_000]; + real f_MPP = (dist.f[DIR_MPP])[k_M00]; + real f_PMP = (dist.f[DIR_PMP])[k_0M0]; + real f_MMP = (dist.f[DIR_MMP])[k_MM0]; + real f_PPM = (dist.f[DIR_PPM])[k_00M]; + real f_MPM = (dist.f[DIR_MPM])[k_M0M]; + real f_PMM = (dist.f[DIR_PMM])[k_0MM]; + real f_MMM = (dist.f[DIR_MMM])[k_MMM]; + + real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + + (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + + ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + + ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + + f_000; + + real oneOverRho = c1o1 / (c1o1 + drho); + + real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + + (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * + oneOverRho; + real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + + (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * + oneOverRho; + real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + + (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * + oneOverRho; + + printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz); + printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM); + + } + +} + + + + +void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t) +{ if(this->inspectionLevel!=level) return; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu index c6d17be97ec9a3c178b9aeb6a3db44ebeb9cf0a8..1a8260ef936e2707fb38fbbba71cdbfac692f350 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu @@ -1,21 +1,52 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PrecursorWriter.cu +//! \ingroup PreCollisionInteractor +//! \author Henrik Asmuth, Henry Korb +//====================================================================================== #include "PrecursorWriter.h" #include "basics/writer/WbWriterVtkXmlImageBinary.h" #include <cuda.h> #include <cuda_runtime.h> #include <helper_cuda.h> -#include <cuda/CudaGrid.h> -#include "Kernel/Utilities/DistributionHelper.cuh" +#include "cuda/CudaGrid.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" -#include <Core/StringUtilities/StringUtil.h> +#include "Core/StringUtilities/StringUtil.h" #include "Parameter/Parameter.h" #include "DataStructureInitializer/GridProvider.h" #include "GPU/CudaMemoryManager.h" using namespace vf::lbm::dir; - - +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //TODO check everything for multiple level @@ -52,13 +83,16 @@ __global__ void fillArrayVelocities(const uint numberOfPrecursorNodes, { - const uint node = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); - if(node>=numberOfPrecursorNodes) return; + if(nodeIndex>=numberOfPrecursorNodes) return; - precursorData[linearIdx(0u, node, numberOfPrecursorNodes)] = vx[indices[node]]*velocityRatio; - precursorData[linearIdx(1u, node, numberOfPrecursorNodes)] = vy[indices[node]]*velocityRatio; - precursorData[linearIdx(2u, node, numberOfPrecursorNodes)] = vz[indices[node]]*velocityRatio; + precursorData[linearIdx(0u, nodeIndex, numberOfPrecursorNodes)] = vx[indices[nodeIndex]]*velocityRatio; + precursorData[linearIdx(1u, nodeIndex, numberOfPrecursorNodes)] = vy[indices[nodeIndex]]*velocityRatio; + precursorData[linearIdx(2u, nodeIndex, numberOfPrecursorNodes)] = vz[indices[nodeIndex]]*velocityRatio; } @@ -71,15 +105,19 @@ __global__ void fillArrayDistributions( uint numberOfPrecursorNodes, bool isEvenTimestep, unsigned long numberOfLBnodes) { - const uint node = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); - if(node>=numberOfPrecursorNodes) return; + if(nodeIndex>=numberOfPrecursorNodes) return; - Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep); + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// // ! - Set neighbor indices (necessary for indirect addressing) - uint k_000 = indices[node]; + uint k_000 = indices[nodeIndex]; // uint k_M00 = neighborX[k_000]; uint k_0M0 = neighborY[k_000]; uint k_00M = neighborZ[k_000]; @@ -91,15 +129,15 @@ __global__ void fillArrayDistributions( uint numberOfPrecursorNodes, //////////////////////////////////////////////////////////////////////////////////// //! - Get local distributions in PX directions //! - precursorData[linearIdx(PrecP00, node, numberOfPrecursorNodes)] = (dist.f[DIR_P00])[k_000]; - precursorData[linearIdx(PrecPP0, node, numberOfPrecursorNodes)] = (dist.f[DIR_PP0])[k_000]; - precursorData[linearIdx(PrecPM0, node, numberOfPrecursorNodes)] = (dist.f[DIR_PM0])[k_0M0]; - precursorData[linearIdx(PrecP0P, node, numberOfPrecursorNodes)] = (dist.f[DIR_P0P])[k_000]; - precursorData[linearIdx(PrecP0M, node, numberOfPrecursorNodes)] = (dist.f[DIR_P0M])[k_00M]; - precursorData[linearIdx(PrecPPP, node, numberOfPrecursorNodes)] = (dist.f[DIR_PPP])[k_000]; - precursorData[linearIdx(PrecPMP, node, numberOfPrecursorNodes)] = (dist.f[DIR_PMP])[k_0M0]; - precursorData[linearIdx(PrecPPM, node, numberOfPrecursorNodes)] = (dist.f[DIR_PPM])[k_00M]; - precursorData[linearIdx(PrecPMM, node, numberOfPrecursorNodes)] = (dist.f[DIR_PMM])[k_0MM]; + precursorData[linearIdx(PrecP00, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P00])[k_000]; + precursorData[linearIdx(PrecPP0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PP0])[k_000]; + precursorData[linearIdx(PrecPM0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PM0])[k_0M0]; + precursorData[linearIdx(PrecP0P, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0P])[k_000]; + precursorData[linearIdx(PrecP0M, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0M])[k_00M]; + precursorData[linearIdx(PrecPPP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPP])[k_000]; + precursorData[linearIdx(PrecPMP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMP])[k_0M0]; + precursorData[linearIdx(PrecPPM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPM])[k_00M]; + precursorData[linearIdx(PrecPMM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMM])[k_0MM]; } diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h index 3bae63a339255f3f72196e20096f6019cdd7748d..264023b58ba6db46b50f6a85b334c530864a0b8f 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h @@ -31,7 +31,7 @@ //! \date 05/12/2022 //! \brief Probe writing planes of data to be used as inflow data in successor simulation using PrecursorBC //! -//! The probe writes out yz-planes at a specifc x position ( \param xPos ) of either velocity or distributions +//! The probe writes out yz-planes at a specific x position ( \param xPos ) of either velocity or distributions //! that can be read by PrecursorBC as inflow data. //=======================================================================================