From da92c5b46be7818540075b0ff6d1a3fbc40581d7 Mon Sep 17 00:00:00 2001 From: Soeren Peters <peters@irmb.tu-bs.de> Date: Mon, 29 Mar 2021 14:51:15 +0000 Subject: [PATCH] Add CUDA LTO. --- CMakeLists.txt | 19 +- apps/gpu/LBM/DrivenCavity/CMakeLists.txt | 6 +- src/gpu/GridGenerator/CMakeLists.txt | 2 +- src/gpu/VirtualFluids_GPU/CMakeLists.txt | 6 + .../VirtualFluids_GPU/GPU/Cumulant27chim.cu | 1338 ----------------- .../GPU/CumulantK17ChimeraKernel.cu | 742 +++++++++ src/lbm/CMakeLists.txt | 4 +- src/lbm/CumulantChimeraPreCompiled.cu | 433 ++++++ src/lbm/CumulantChimeraPreCompiled.h | 59 + 9 files changed, 1263 insertions(+), 1346 deletions(-) create mode 100644 src/gpu/VirtualFluids_GPU/GPU/CumulantK17ChimeraKernel.cu create mode 100644 src/lbm/CumulantChimeraPreCompiled.cu create mode 100644 src/lbm/CumulantChimeraPreCompiled.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c85fcf7e..e1d5b0736 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,12 +94,21 @@ if(BUILD_VF_GPU) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --display_error_number") - if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - message(WARNING "CMAKE_CUDA_ARCHITECTURES was not defined and is set to 30 (CUDA support until 10.1 only).") - set(CMAKE_CUDA_ARCHITECTURES 30) - endif() + #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -dlto") + enable_language(CUDA) + #set(CMAKE_CUDA_FLAGS "-gencode arch=compute_70,code=lto_70") + + set(CMAKE_CUDA_FLAGS "-dlto -arch=sm_75") + #set(CMAKE_CUDA_ARCHITECTURES 75) + #set(CMAKE_CUDA_FLAGS "-arch=sm_75") + + #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --display_error_number") + + #if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + # message(WARNING "CMAKE_CUDA_ARCHITECTURES was not defined and is set to 30 (CUDA support until 10.1 only).") + # set(CMAKE_CUDA_ARCHITECTURES 30) + #endif() message("CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}") endif() diff --git a/apps/gpu/LBM/DrivenCavity/CMakeLists.txt b/apps/gpu/LBM/DrivenCavity/CMakeLists.txt index c72393791..10873fcd4 100644 --- a/apps/gpu/LBM/DrivenCavity/CMakeLists.txt +++ b/apps/gpu/LBM/DrivenCavity/CMakeLists.txt @@ -6,4 +6,8 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA) -set_target_properties(DrivenCavity PROPERTIES CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file +set_target_properties(DrivenCavity PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +#target_compile_options(DrivenCavity PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-dlto>) + +#target_link_options(DrivenCavity PRIVATE $<DEVICE_LINK:-dlto>) \ No newline at end of file diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt index 29d77897a..84dd965d7 100644 --- a/src/gpu/GridGenerator/CMakeLists.txt +++ b/src/gpu/GridGenerator/CMakeLists.txt @@ -9,7 +9,7 @@ set_target_properties(${library_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) # according to linker error when building static libraries. # https://stackoverflow.com/questions/50033435/cmake-cuda-separate-compilation-static-lib-link-error-on-windows-but-not-on-ubun if (NOT BUILD_SHARED_LIBRARY) - set_target_properties(${library_name} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) + #set_target_properties(${library_name} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) endif() # we want to suppress all cuda warnings so far for this library. diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index c2d897563..bda286187 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -13,3 +13,9 @@ linkBoost(COMPONENTS "serialization") #SET(TPN_WIN32 "/EHsc") #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake #https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax + +set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +#target_compile_options(VirtualFluids_GPU PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-dlto>) + +#target_link_options(VirtualFluids_GPU PRIVATE $<DEVICE_LINK:-dlto>) diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu index 49e2c10a2..d34d545dc 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu @@ -37,1344 +37,6 @@ #include "math.h" #include <lbm/Chimera.h> -#include <lbm/CumulantChimera.h> - - - -__device__ Distributions27 getDistributions27(real* distributions, unsigned int size_Mat, bool isEvenTimestep) -{ - Distributions27 dist; - if (isEvenTimestep) - { - dist.f[dirE ] = &distributions[dirE *size_Mat]; - dist.f[dirW ] = &distributions[dirW *size_Mat]; - dist.f[dirN ] = &distributions[dirN *size_Mat]; - dist.f[dirS ] = &distributions[dirS *size_Mat]; - dist.f[dirT ] = &distributions[dirT *size_Mat]; - dist.f[dirB ] = &distributions[dirB *size_Mat]; - dist.f[dirNE ] = &distributions[dirNE *size_Mat]; - dist.f[dirSW ] = &distributions[dirSW *size_Mat]; - dist.f[dirSE ] = &distributions[dirSE *size_Mat]; - dist.f[dirNW ] = &distributions[dirNW *size_Mat]; - dist.f[dirTE ] = &distributions[dirTE *size_Mat]; - dist.f[dirBW ] = &distributions[dirBW *size_Mat]; - dist.f[dirBE ] = &distributions[dirBE *size_Mat]; - dist.f[dirTW ] = &distributions[dirTW *size_Mat]; - dist.f[dirTN ] = &distributions[dirTN *size_Mat]; - dist.f[dirBS ] = &distributions[dirBS *size_Mat]; - dist.f[dirBN ] = &distributions[dirBN *size_Mat]; - dist.f[dirTS ] = &distributions[dirTS *size_Mat]; - dist.f[dirREST] = &distributions[dirREST*size_Mat]; - dist.f[dirTNE ] = &distributions[dirTNE *size_Mat]; - dist.f[dirTSW ] = &distributions[dirTSW *size_Mat]; - dist.f[dirTSE ] = &distributions[dirTSE *size_Mat]; - dist.f[dirTNW ] = &distributions[dirTNW *size_Mat]; - dist.f[dirBNE ] = &distributions[dirBNE *size_Mat]; - dist.f[dirBSW ] = &distributions[dirBSW *size_Mat]; - dist.f[dirBSE ] = &distributions[dirBSE *size_Mat]; - dist.f[dirBNW ] = &distributions[dirBNW *size_Mat]; - } - else - { - dist.f[dirW ] = &distributions[dirE *size_Mat]; - dist.f[dirE ] = &distributions[dirW *size_Mat]; - dist.f[dirS ] = &distributions[dirN *size_Mat]; - dist.f[dirN ] = &distributions[dirS *size_Mat]; - dist.f[dirB ] = &distributions[dirT *size_Mat]; - dist.f[dirT ] = &distributions[dirB *size_Mat]; - dist.f[dirSW ] = &distributions[dirNE *size_Mat]; - dist.f[dirNE ] = &distributions[dirSW *size_Mat]; - dist.f[dirNW ] = &distributions[dirSE *size_Mat]; - dist.f[dirSE ] = &distributions[dirNW *size_Mat]; - dist.f[dirBW ] = &distributions[dirTE *size_Mat]; - dist.f[dirTE ] = &distributions[dirBW *size_Mat]; - dist.f[dirTW ] = &distributions[dirBE *size_Mat]; - dist.f[dirBE ] = &distributions[dirTW *size_Mat]; - dist.f[dirBS ] = &distributions[dirTN *size_Mat]; - dist.f[dirTN ] = &distributions[dirBS *size_Mat]; - dist.f[dirTS ] = &distributions[dirBN *size_Mat]; - dist.f[dirBN ] = &distributions[dirTS *size_Mat]; - dist.f[dirREST] = &distributions[dirREST*size_Mat]; - dist.f[dirBSW ] = &distributions[dirTNE *size_Mat]; - dist.f[dirBNE ] = &distributions[dirTSW *size_Mat]; - dist.f[dirBNW ] = &distributions[dirTSE *size_Mat]; - dist.f[dirBSE ] = &distributions[dirTNW *size_Mat]; - dist.f[dirTSW ] = &distributions[dirBNE *size_Mat]; - dist.f[dirTNE ] = &distributions[dirBSW *size_Mat]; - dist.f[dirTNW ] = &distributions[dirBSE *size_Mat]; - dist.f[dirTSE ] = &distributions[dirBNW *size_Mat]; - } - return dist; -} - -struct DistributionWrapper -{ - __device__ DistributionWrapper( - real* distributions, - unsigned int size_Mat, - bool isEvenTimestep, - uint k, - uint* neighborX, - uint* neighborY, - uint* neighborZ) : - dist(getDistributions27(distributions, size_Mat, isEvenTimestep)), - k(k), - kw (neighborX[k]), - ks (neighborY[k]), - kb (neighborZ[k]), - ksw (neighborY[kw]), - kbw (neighborZ[kw]), - kbs (neighborZ[ks]), - kbsw(neighborZ[ksw]) - { - read(); - } - - __device__ void read() - { - distribution.f[VF::LBM::DIR::PZZ] = (dist.f[dirE ])[k]; - distribution.f[VF::LBM::DIR::MZZ] = (dist.f[dirW ])[kw]; - distribution.f[VF::LBM::DIR::ZPZ] = (dist.f[dirN ])[k]; - distribution.f[VF::LBM::DIR::ZMZ] = (dist.f[dirS ])[ks]; - distribution.f[VF::LBM::DIR::ZZP] = (dist.f[dirT ])[k]; - distribution.f[VF::LBM::DIR::ZZM] = (dist.f[dirB ])[kb]; - distribution.f[VF::LBM::DIR::PPZ] = (dist.f[dirNE ])[k]; - distribution.f[VF::LBM::DIR::MMZ] = (dist.f[dirSW ])[ksw]; - distribution.f[VF::LBM::DIR::PMZ] = (dist.f[dirSE ])[ks]; - distribution.f[VF::LBM::DIR::MPZ] = (dist.f[dirNW ])[kw]; - distribution.f[VF::LBM::DIR::PZP] = (dist.f[dirTE ])[k]; - distribution.f[VF::LBM::DIR::MZM] = (dist.f[dirBW ])[kbw]; - distribution.f[VF::LBM::DIR::PZM] = (dist.f[dirBE ])[kb]; - distribution.f[VF::LBM::DIR::MZP] = (dist.f[dirTW ])[kw]; - distribution.f[VF::LBM::DIR::ZPP] = (dist.f[dirTN ])[k]; - distribution.f[VF::LBM::DIR::ZMM] = (dist.f[dirBS ])[kbs]; - distribution.f[VF::LBM::DIR::ZPM] = (dist.f[dirBN ])[kb]; - distribution.f[VF::LBM::DIR::ZMP] = (dist.f[dirTS ])[ks]; - distribution.f[VF::LBM::DIR::PPP] = (dist.f[dirTNE ])[k]; - distribution.f[VF::LBM::DIR::MPP] = (dist.f[dirTNW ])[kw]; - distribution.f[VF::LBM::DIR::PMP] = (dist.f[dirTSE ])[ks]; - distribution.f[VF::LBM::DIR::MMP] = (dist.f[dirTSW ])[ksw]; - distribution.f[VF::LBM::DIR::PPM] = (dist.f[dirBNE ])[kb]; - distribution.f[VF::LBM::DIR::MPM] = (dist.f[dirBNW ])[kbw]; - distribution.f[VF::LBM::DIR::PMM] = (dist.f[dirBSE ])[kbs]; - distribution.f[VF::LBM::DIR::MMM] = (dist.f[dirBSW ])[kbsw]; - distribution.f[VF::LBM::DIR::ZZZ] = (dist.f[dirREST])[k]; - } - - __device__ void write() - { - (dist.f[dirE ])[k] = distribution.f[VF::LBM::DIR::PZZ]; - (dist.f[dirW ])[kw] = distribution.f[VF::LBM::DIR::MZZ]; - (dist.f[dirN ])[k] = distribution.f[VF::LBM::DIR::ZPZ]; - (dist.f[dirS ])[ks] = distribution.f[VF::LBM::DIR::ZMZ]; - (dist.f[dirT ])[k] = distribution.f[VF::LBM::DIR::ZZP]; - (dist.f[dirB ])[kb] = distribution.f[VF::LBM::DIR::ZZM]; - (dist.f[dirNE ])[k] = distribution.f[VF::LBM::DIR::PPZ]; - (dist.f[dirSW ])[ksw] = distribution.f[VF::LBM::DIR::MMZ]; - (dist.f[dirSE ])[ks] = distribution.f[VF::LBM::DIR::PMZ]; - (dist.f[dirNW ])[kw] = distribution.f[VF::LBM::DIR::MPZ]; - (dist.f[dirTE ])[k] = distribution.f[VF::LBM::DIR::PZP]; - (dist.f[dirBW ])[kbw] = distribution.f[VF::LBM::DIR::MZM]; - (dist.f[dirBE ])[kb] = distribution.f[VF::LBM::DIR::PZM]; - (dist.f[dirTW ])[kw] = distribution.f[VF::LBM::DIR::MZP]; - (dist.f[dirTN ])[k] = distribution.f[VF::LBM::DIR::ZPP]; - (dist.f[dirBS ])[kbs] = distribution.f[VF::LBM::DIR::ZMM]; - (dist.f[dirBN ])[kb] = distribution.f[VF::LBM::DIR::ZPM]; - (dist.f[dirTS ])[ks] = distribution.f[VF::LBM::DIR::ZMP]; - (dist.f[dirTNE ])[k] = distribution.f[VF::LBM::DIR::PPP]; - (dist.f[dirTNW ])[kw] = distribution.f[VF::LBM::DIR::MPP]; - (dist.f[dirTSE ])[ks] = distribution.f[VF::LBM::DIR::PMP]; - (dist.f[dirTSW ])[ksw] = distribution.f[VF::LBM::DIR::MMP]; - (dist.f[dirBNE ])[kb] = distribution.f[VF::LBM::DIR::PPM]; - (dist.f[dirBNW ])[kbw] = distribution.f[VF::LBM::DIR::MPM]; - (dist.f[dirBSE ])[kbs] = distribution.f[VF::LBM::DIR::PMM]; - (dist.f[dirBSW ])[kbsw] = distribution.f[VF::LBM::DIR::MMM]; - (dist.f[dirREST])[k] = distribution.f[VF::LBM::DIR::ZZZ]; - } - - Distributions27 dist; - - VF::LBM::Distribution27 distribution; - - const uint k; - const uint kw; - const uint ks; - const uint kb; - const uint ksw; - const uint kbw; - const uint kbs; - const uint kbsw; -}; - -__device__ unsigned int getNodeIndex() -{ - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - return nx*(ny*z + y) + x; -} - -__device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType) -{ - return (k < size_Mat) && (nodeType == GEO_FLUID); -} - -__device__ void getLevelForce(real fx, real fy, real fz, int level, real* forces) -{ - real fx_t {1.}, fy_t {1.}, fz_t {1.}; - for (int i = 0; i < level; i++) - { - fx_t *= c2o1; - fy_t *= c2o1; - fz_t *= c2o1; - } - - forces[0] = fx / fx_t; - forces[1] = fy / fy_t; - forces[2] = fz / fz_t; -} - - -extern "C" __global__ void Cumulant_K17_LBM_Device_Kernel( - real omega, - uint* typeOfGridNode, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - int size_Mat, - int level, - real* forces, - bool isEvenTimestep) -{ - const uint k = getNodeIndex(); - const uint nodeType = typeOfGridNode[k]; - - if (isValidFluidNode(k, size_Mat, nodeType)) - { - DistributionWrapper distributionWrapper { - distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ - }; - - real level_forces[3]; - getLevelForce(forces[0], forces[1], forces[2], level, level_forces); - - VF::LBM::cumulantChimera(distributionWrapper.distribution, omega, level_forces); - - distributionWrapper.write(); - } -} - - - -//////////////////////////////////////////////////////////////////////////////// -extern "C" __global__ void Cumulant_K17_LBM_Device_Kernel_old( - real omega, - uint* typeOfGridNode, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - int size_Mat, - int level, - real* forces, - bool isEvenTimestep) -{ - ////////////////////////////////////////////////////////////////////////// - //! Cumulant K17 Kernel is based on \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! and \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> - //! - //! The cumulant kernel is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - - ////////////////////////////////////////////////////////////////////////// - // run for all indices in size_Mat and fluid nodes - if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) - { - ////////////////////////////////////////////////////////////////////////// - //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> - //! - Distributions27 dist = getDistributions27(distributions, size_Mat, isEvenTimestep); - - //////////////////////////////////////////////////////////////////////////////// - //! - Set neighbor indices (necessary for indirect addressing) - uint kw = neighborX[k]; - uint ks = neighborY[k]; - uint kb = neighborZ[k]; - uint ksw = neighborY[kw]; - uint kbw = neighborZ[kw]; - uint kbs = neighborZ[ks]; - uint kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions - //! - // real mfcbb = (dist.f[dirE ])[k]; - // real mfabb = (dist.f[dirW ])[kw]; - // real mfbcb = (dist.f[dirN ])[k]; - // real mfbab = (dist.f[dirS ])[ks]; - // real mfbbc = (dist.f[dirT ])[k]; - // real mfbba = (dist.f[dirB ])[kb]; - // real mfccb = (dist.f[dirNE ])[k]; - // real mfaab = (dist.f[dirSW ])[ksw]; - // real mfcab = (dist.f[dirSE ])[ks]; - // real mfacb = (dist.f[dirNW ])[kw]; - // real mfcbc = (dist.f[dirTE ])[k]; - // real mfaba = (dist.f[dirBW ])[kbw]; - // real mfcba = (dist.f[dirBE ])[kb]; - // real mfabc = (dist.f[dirTW ])[kw]; - // real mfbcc = (dist.f[dirTN ])[k]; - // real mfbaa = (dist.f[dirBS ])[kbs]; - // real mfbca = (dist.f[dirBN ])[kb]; - // real mfbac = (dist.f[dirTS ])[ks]; - // real mfbbb = (dist.f[dirREST])[k]; - // real mfccc = (dist.f[dirTNE ])[k]; - // real mfaac = (dist.f[dirTSW ])[ksw]; - // real mfcac = (dist.f[dirTSE ])[ks]; - // real mfacc = (dist.f[dirTNW ])[kw]; - // real mfcca = (dist.f[dirBNE ])[kb]; - // real mfaaa = (dist.f[dirBSW ])[kbsw]; - // real mfcaa = (dist.f[dirBSE ])[kbs]; - // real mfaca = (dist.f[dirBNW ])[kbw]; - - VF::LBM::Distribution27 distribution_in; - - distribution_in.f[VF::LBM::DIR::PZZ] = (dist.f[dirE ])[k]; - distribution_in.f[VF::LBM::DIR::MZZ] = (dist.f[dirW ])[kw]; - distribution_in.f[VF::LBM::DIR::ZPZ] = (dist.f[dirN ])[k]; - distribution_in.f[VF::LBM::DIR::ZMZ] = (dist.f[dirS ])[ks]; - distribution_in.f[VF::LBM::DIR::ZZP] = (dist.f[dirT ])[k]; - distribution_in.f[VF::LBM::DIR::ZZM] = (dist.f[dirB ])[kb]; - distribution_in.f[VF::LBM::DIR::PPZ] = (dist.f[dirNE ])[k]; - distribution_in.f[VF::LBM::DIR::MMZ] = (dist.f[dirSW ])[ksw]; - distribution_in.f[VF::LBM::DIR::PMZ] = (dist.f[dirSE ])[ks]; - distribution_in.f[VF::LBM::DIR::MPZ] = (dist.f[dirNW ])[kw]; - distribution_in.f[VF::LBM::DIR::PZP] = (dist.f[dirTE ])[k]; - distribution_in.f[VF::LBM::DIR::MZM] = (dist.f[dirBW ])[kbw]; - distribution_in.f[VF::LBM::DIR::PZM] = (dist.f[dirBE ])[kb]; - distribution_in.f[VF::LBM::DIR::MZP] = (dist.f[dirTW ])[kw]; - distribution_in.f[VF::LBM::DIR::ZPP] = (dist.f[dirTN ])[k]; - distribution_in.f[VF::LBM::DIR::ZMM] = (dist.f[dirBS ])[kbs]; - distribution_in.f[VF::LBM::DIR::ZPM] = (dist.f[dirBN ])[kb]; - distribution_in.f[VF::LBM::DIR::ZMP] = (dist.f[dirTS ])[ks]; - distribution_in.f[VF::LBM::DIR::PPP] = (dist.f[dirTNE ])[k]; - distribution_in.f[VF::LBM::DIR::MPP] = (dist.f[dirTNW ])[kw]; - distribution_in.f[VF::LBM::DIR::PMP] = (dist.f[dirTSE ])[ks]; - distribution_in.f[VF::LBM::DIR::MMP] = (dist.f[dirTSW ])[ksw]; - distribution_in.f[VF::LBM::DIR::PPM] = (dist.f[dirBNE ])[kb]; - distribution_in.f[VF::LBM::DIR::MPM] = (dist.f[dirBNW ])[kbw]; - distribution_in.f[VF::LBM::DIR::PMM] = (dist.f[dirBSE ])[kbs]; - distribution_in.f[VF::LBM::DIR::MMM] = (dist.f[dirBSW ])[kbsw]; - distribution_in.f[VF::LBM::DIR::ZZZ] = (dist.f[dirREST])[k]; - - real mfcbb = distribution_in.f[VF::LBM::DIR::PZZ]; - real mfabb = distribution_in.f[VF::LBM::DIR::MZZ]; - real mfbcb = distribution_in.f[VF::LBM::DIR::ZPZ]; - real mfbab = distribution_in.f[VF::LBM::DIR::ZMZ]; - real mfbbc = distribution_in.f[VF::LBM::DIR::ZZP]; - real mfbba = distribution_in.f[VF::LBM::DIR::ZZM]; - real mfccb = distribution_in.f[VF::LBM::DIR::PPZ]; - real mfaab = distribution_in.f[VF::LBM::DIR::MMZ]; - real mfcab = distribution_in.f[VF::LBM::DIR::PMZ]; - real mfacb = distribution_in.f[VF::LBM::DIR::MPZ]; - real mfcbc = distribution_in.f[VF::LBM::DIR::PZP]; - real mfaba = distribution_in.f[VF::LBM::DIR::MZM]; - real mfcba = distribution_in.f[VF::LBM::DIR::PZM]; - real mfabc = distribution_in.f[VF::LBM::DIR::MZP]; - real mfbcc = distribution_in.f[VF::LBM::DIR::ZPP]; - real mfbaa = distribution_in.f[VF::LBM::DIR::ZMM]; - real mfbca = distribution_in.f[VF::LBM::DIR::ZPM]; - real mfbac = distribution_in.f[VF::LBM::DIR::ZMP]; - real mfccc = distribution_in.f[VF::LBM::DIR::PPP]; - real mfacc = distribution_in.f[VF::LBM::DIR::MPP]; - real mfcac = distribution_in.f[VF::LBM::DIR::PMP]; - real mfaac = distribution_in.f[VF::LBM::DIR::MMP]; - real mfcca = distribution_in.f[VF::LBM::DIR::PPM]; - real mfaca = distribution_in.f[VF::LBM::DIR::MPM]; - real mfcaa = distribution_in.f[VF::LBM::DIR::PMM]; - real mfaaa = distribution_in.f[VF::LBM::DIR::MMM]; - real mfbbb = distribution_in.f[VF::LBM::DIR::ZZZ]; - - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - real rho = c1o1 + drho; - real OOrho = c1o1 / rho; - real vvx = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) * OOrho; - real vvy = - ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) * OOrho; - real vvz = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) * OOrho; - //////////////////////////////////////////////////////////////////////////////////// - //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - real fx = forces[0]; - real fy = forces[1]; - real fz = forces[2]; - - real fx_t {1.}, fy_t {1.}, fz_t {1.}; - for (int i = 0; i < level; i++) - { - fx_t *= c2o1; - fy_t *= c2o1; - fz_t *= c2o1; - } - - fx /= fx_t; - fy /= fy_t; - fz /= fz_t; - //real forces[3] {fx, fy, fz}; - - vvx += fx * c1o2; - vvy += fy * c1o2; - vvz += fz * c1o2; - //////////////////////////////////////////////////////////////////////////////////// - // calculate the square of velocities for this lattice node - real vx2 = vvx*vvx; - real vy2 = vvy*vvy; - real vz2 = vvz*vvz; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - real wadjust; - real qudricLimitP = c1o100; - real qudricLimitM = c1o100; - real qudricLimitD = c1o100; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! see also Eq. (6)-(14) in \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); - VF::LBM::forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); - VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - VF::LBM::forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); - VF::LBM::forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); - VF::LBM::forwardChimera( mfaab, mfabb, mfacb, vvy, vy2); - VF::LBM::forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); - VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - VF::LBM::forwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); - VF::LBM::forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); - VF::LBM::forwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); - VF::LBM::forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - VF::LBM::forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); - VF::LBM::forwardChimera( mfaba, mfbba, mfcba, vvx, vx2); - VF::LBM::forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); - VF::LBM::forwardChimera( mfaab, mfbab, mfcab, vvx, vx2); - VF::LBM::forwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); - VF::LBM::forwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); - VF::LBM::forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); - VF::LBM::forwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); - VF::LBM::forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); - //////////////////////////////////////////////////////////////////////////////////// - //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. - //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. - //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. - //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. - //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). - //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. - //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. - //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. - //! - //////////////////////////////////////////////////////////// - //2. - real OxxPyyPzz = c1o1; - //////////////////////////////////////////////////////////// - //3. - real OxyyPxzz = c8o1 * (-c2o1 + omega) * ( c1o1 + c2o1*omega) / (-c8o1 - c14o1*omega + c7o1*omega*omega); - real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1*omega) / (c56o1 - c50o1*omega + c9o1*omega*omega); - real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1*omega + c3o1*omega*omega) / (c48o1 + c152o1*omega - c130o1*omega*omega + c29o1*omega*omega*omega); - //////////////////////////////////////////////////////////// - //4. - real O4 = c1o1; - //////////////////////////////////////////////////////////// - //5. - real O5 = c1o1; - //////////////////////////////////////////////////////////// - //6. - real O6 = c1o1; - //////////////////////////////////////////////////////////////////////////////////// - //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity). - //! - real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); - real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute cumulants from central moments according to Eq. (20)-(23) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - //////////////////////////////////////////////////////////// - //4. - real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho; - real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho; - real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho; - real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho * OOrho)); - real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho * OOrho)); - real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho * OOrho)); - //////////////////////////////////////////////////////////// - //5. - real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho; - real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho; - real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho; - //////////////////////////////////////////////////////////// - //6. - real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho - + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2o1 * (mfcaa * mfaca * mfaac) - + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - - c1o9 * (mfcaa + mfaca + mfaac) * OOrho - + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 - + c1o27*((drho * drho - drho) * OOrho * OOrho)); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute linear combinations of second and third order cumulants - //! - //////////////////////////////////////////////////////////// - //2. - real mxxPyyPzz = mfcaa + mfaca + mfaac; - real mxxMyy = mfcaa - mfaca; - real mxxMzz = mfcaa - mfaac; - //////////////////////////////////////////////////////////// - //3. - real mxxyPyzz = mfcba + mfabc; - real mxxyMyzz = mfcba - mfabc; - real mxxzPyyz = mfcab + mfacb; - real mxxzMyyz = mfcab - mfacb; - real mxyyPxzz = mfbca + mfbac; - real mxyyMxzz = mfbca - mfbac; - //////////////////////////////////////////////////////////////////////////////////// - //incl. correction - //////////////////////////////////////////////////////////// - //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! Further explanations of the correction in viscosity in Appendix H of - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! Note that the division by rho is omitted here as we need rho times the gradients later. - //! - real Dxy = -c3o1*omega*mfbba; - real Dxz = -c3o1*omega*mfbab; - real Dyz = -c3o1*omega*mfabb; - real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); - real dyuy = dxux + omega * c3o2 * mxxMyy; - real dzuz = dxux + omega * c3o2 * mxxMzz; - //////////////////////////////////////////////////////////// - //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); - mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); - mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - //////////////////////////////////////////////////////////////////////////////////// - ////no correction - //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); - //mxxMyy += -(-omega) * (-mxxMyy); - //mxxMzz += -(-omega) * (-mxxMzz); - ////////////////////////////////////////////////////////////////////////// - mfabb += omega * (-mfabb); - mfbab += omega * (-mfbab); - mfbba += omega * (-mfbba); - //////////////////////////////////////////////////////////////////////////////////// - //relax - ////////////////////////////////////////////////////////////////////////// - // incl. limiter - //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - wadjust = Oxyz + (c1o1 - Oxyz)*abs(mfbbb) / (abs(mfbbb) + qudricLimitD); - mfbbb += wadjust * (-mfbbb); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); - mxxyPyzz += wadjust * (-mxxyPyzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); - mxxyMyzz += wadjust * (-mxxyMyzz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); - mxxzPyyz += wadjust * (-mxxzPyyz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); - mxxzMyyz += wadjust * (-mxxzMyyz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); - mxyyPxzz += wadjust * (-mxyyPxzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); - mxyyMxzz += wadjust * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - // no limiter - //mfbbb += OxyyMxzz * (-mfbbb); - //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); - //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); - //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); - //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); - //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); - //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute inverse linear combinations of second and third order cumulants - //! - mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); - mfaca = c1o3 * (-c2o1* mxxMyy + mxxMzz + mxxPyyPzz); - mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz); - mfcba = ( mxxyMyzz + mxxyPyzz) * c1o2; - mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; - mfcab = ( mxxzMyyz + mxxzPyyz) * c1o2; - mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; - mfbca = ( mxyyMxzz + mxyyPxzz) * c1o2; - mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; - ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - //4. - // no limiter - //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc); - CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac); - CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca); - CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc); - CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb); - CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb); - ////////////////////////////////////////////////////////////////////////// - //5. - CUMbcc += O5 * (-CUMbcc); - CUMcbc += O5 * (-CUMcbc); - CUMccb += O5 * (-CUMccb); - ////////////////////////////////////////////////////////////////////////// - //6. - CUMccc += O6 * (-CUMccc); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - ////////////////////////////////////////////////////////////////////////// - //4. - mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; - mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho; - mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; - mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho))*c1o9; - mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho))*c1o9; - mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho))*c1o9; - ////////////////////////////////////////////////////////////////////////// - //5. - mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho; - mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho; - mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; - ////////////////////////////////////////////////////////////////////////// - //6. - mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho - + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2o1 * (mfcaa * mfaca * mfaac) - + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - - c1o9 * (mfcaa + mfaca + mfaac) * OOrho - + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 - + c1o27*((drho * drho - drho) * OOrho * OOrho)); - //////////////////////////////////////////////////////////////////////////////////// - //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - mfbaa = -mfbaa; - mfaba = -mfaba; - mfaab = -mfaab; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! see also Eq. (88)-(96) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - VF::LBM::backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); - VF::LBM::backwardChimera( mfaba, mfbba, mfcba, vvx, vx2); - VF::LBM::backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); - VF::LBM::backwardChimera( mfaab, mfbab, mfcab, vvx, vx2); - VF::LBM::backwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); - VF::LBM::backwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); - VF::LBM::backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); - VF::LBM::backwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); - VF::LBM::backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9); - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); - VF::LBM::backwardChimera( mfaab, mfabb, mfacb, vvy, vy2); - VF::LBM::backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); - VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - VF::LBM::backwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); - VF::LBM::backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); - VF::LBM::backwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); - VF::LBM::backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); - VF::LBM::backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); - VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - VF::LBM::backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); - VF::LBM::backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Write distributions: style of reading and writing the distributions from/to - //! stored arrays dependent on timestep is based on the esoteric twist algorithm - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> - //! - // (dist.f[dirE ])[k ] = mfabb; - // (dist.f[dirW ])[kw ] = mfcbb; - // (dist.f[dirN ])[k ] = mfbab; - // (dist.f[dirS ])[ks ] = mfbcb; - // (dist.f[dirT ])[k ] = mfbba; - // (dist.f[dirB ])[kb ] = mfbbc; - // (dist.f[dirNE ])[k ] = mfaab; - // (dist.f[dirSW ])[ksw ] = mfccb; - // (dist.f[dirSE ])[ks ] = mfacb; - // (dist.f[dirNW ])[kw ] = mfcab; - // (dist.f[dirTE ])[k ] = mfaba; - // (dist.f[dirBW ])[kbw ] = mfcbc; - // (dist.f[dirBE ])[kb ] = mfabc; - // (dist.f[dirTW ])[kw ] = mfcba; - // (dist.f[dirTN ])[k ] = mfbaa; - // (dist.f[dirBS ])[kbs ] = mfbcc; - // (dist.f[dirBN ])[kb ] = mfbac; - // (dist.f[dirTS ])[ks ] = mfbca; - // (dist.f[dirREST])[k ] = mfbbb; - // (dist.f[dirTNE ])[k ] = mfaaa; - // (dist.f[dirTSE ])[ks ] = mfaca; - // (dist.f[dirBNE ])[kb ] = mfaac; - // (dist.f[dirBSE ])[kbs ] = mfacc; - // (dist.f[dirTNW ])[kw ] = mfcaa; - // (dist.f[dirTSW ])[ksw ] = mfcca; - // (dist.f[dirBNW ])[kbw ] = mfcac; - // (dist.f[dirBSW ])[kbsw] = mfccc; - - VF::LBM::Distribution27 distribution_out; - - distribution_out.f[VF::LBM::DIR::MZZ] = mfcbb; - distribution_out.f[VF::LBM::DIR::PZZ] = mfabb; - distribution_out.f[VF::LBM::DIR::ZMZ] = mfbcb; - distribution_out.f[VF::LBM::DIR::ZPZ] = mfbab; - distribution_out.f[VF::LBM::DIR::ZZM] = mfbbc; - distribution_out.f[VF::LBM::DIR::ZZP] = mfbba; - distribution_out.f[VF::LBM::DIR::MMZ] = mfccb; - distribution_out.f[VF::LBM::DIR::PPZ] = mfaab; - distribution_out.f[VF::LBM::DIR::MPZ] = mfcab; - distribution_out.f[VF::LBM::DIR::PMZ] = mfacb; - distribution_out.f[VF::LBM::DIR::MZM] = mfcbc; - distribution_out.f[VF::LBM::DIR::PZP] = mfaba; - distribution_out.f[VF::LBM::DIR::MZP] = mfcba; - distribution_out.f[VF::LBM::DIR::PZM] = mfabc; - distribution_out.f[VF::LBM::DIR::ZMM] = mfbcc; - distribution_out.f[VF::LBM::DIR::ZPP] = mfbaa; - distribution_out.f[VF::LBM::DIR::ZMP] = mfbca; - distribution_out.f[VF::LBM::DIR::ZPM] = mfbac; - distribution_out.f[VF::LBM::DIR::MMM] = mfccc; - distribution_out.f[VF::LBM::DIR::PMM] = mfacc; - distribution_out.f[VF::LBM::DIR::MPM] = mfcac; - distribution_out.f[VF::LBM::DIR::PPM] = mfaac; - distribution_out.f[VF::LBM::DIR::MMP] = mfcca; - distribution_out.f[VF::LBM::DIR::PMP] = mfaca; - distribution_out.f[VF::LBM::DIR::MPP] = mfcaa; - distribution_out.f[VF::LBM::DIR::PPP] = mfaaa; - distribution_out.f[VF::LBM::DIR::ZZZ] = mfbbb; - - - (dist.f[dirE ])[k] = distribution_out.f[VF::LBM::DIR::PZZ]; - (dist.f[dirW ])[kw] = distribution_out.f[VF::LBM::DIR::MZZ]; - (dist.f[dirN ])[k] = distribution_out.f[VF::LBM::DIR::ZPZ]; - (dist.f[dirS ])[ks] = distribution_out.f[VF::LBM::DIR::ZMZ]; - (dist.f[dirT ])[k] = distribution_out.f[VF::LBM::DIR::ZZP]; - (dist.f[dirB ])[kb] = distribution_out.f[VF::LBM::DIR::ZZM]; - (dist.f[dirNE ])[k] = distribution_out.f[VF::LBM::DIR::PPZ]; - (dist.f[dirSW ])[ksw] = distribution_out.f[VF::LBM::DIR::MMZ]; - (dist.f[dirSE ])[ks] = distribution_out.f[VF::LBM::DIR::PMZ]; - (dist.f[dirNW ])[kw] = distribution_out.f[VF::LBM::DIR::MPZ]; - (dist.f[dirTE ])[k] = distribution_out.f[VF::LBM::DIR::PZP]; - (dist.f[dirBW ])[kbw] = distribution_out.f[VF::LBM::DIR::MZM]; - (dist.f[dirBE ])[kb] = distribution_out.f[VF::LBM::DIR::PZM]; - (dist.f[dirTW ])[kw] = distribution_out.f[VF::LBM::DIR::MZP]; - (dist.f[dirTN ])[k] = distribution_out.f[VF::LBM::DIR::ZPP]; - (dist.f[dirBS ])[kbs] = distribution_out.f[VF::LBM::DIR::ZMM]; - (dist.f[dirBN ])[kb] = distribution_out.f[VF::LBM::DIR::ZPM]; - (dist.f[dirTS ])[ks] = distribution_out.f[VF::LBM::DIR::ZMP]; - (dist.f[dirTNE ])[k] = distribution_out.f[VF::LBM::DIR::PPP]; - (dist.f[dirTNW ])[kw] = distribution_out.f[VF::LBM::DIR::MPP]; - (dist.f[dirTSE ])[ks] = distribution_out.f[VF::LBM::DIR::PMP]; - (dist.f[dirTSW ])[ksw] = distribution_out.f[VF::LBM::DIR::MMP]; - (dist.f[dirBNE ])[kb] = distribution_out.f[VF::LBM::DIR::PPM]; - (dist.f[dirBNW ])[kbw] = distribution_out.f[VF::LBM::DIR::MPM]; - (dist.f[dirBSE ])[kbs] = distribution_out.f[VF::LBM::DIR::PMM]; - (dist.f[dirBSW ])[kbsw] = distribution_out.f[VF::LBM::DIR::MMM]; - (dist.f[dirREST])[k] = distribution_out.f[VF::LBM::DIR::ZZZ]; - - } -} -//////////////////////////////////////////////////////////////////////////////// - -extern "C" __global__ void Cumulant_K17_LBM_Device_Kernel_old_origin( - real omega, - uint* typeOfGridNode, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - int size_Mat, - int level, - real* forces, - bool isEvenTimestep) -{ - ////////////////////////////////////////////////////////////////////////// - //! Cumulant K17 Kernel is based on \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! and \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> - //! - //! The cumulant kernel is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - - ////////////////////////////////////////////////////////////////////////// - // run for all indices in size_Mat and fluid nodes - if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) - { - ////////////////////////////////////////////////////////////////////////// - //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> - //! - Distributions27 dist; - if (isEvenTimestep) - { - dist.f[dirE ] = &distributions[dirE *size_Mat]; - dist.f[dirW ] = &distributions[dirW *size_Mat]; - dist.f[dirN ] = &distributions[dirN *size_Mat]; - dist.f[dirS ] = &distributions[dirS *size_Mat]; - dist.f[dirT ] = &distributions[dirT *size_Mat]; - dist.f[dirB ] = &distributions[dirB *size_Mat]; - dist.f[dirNE ] = &distributions[dirNE *size_Mat]; - dist.f[dirSW ] = &distributions[dirSW *size_Mat]; - dist.f[dirSE ] = &distributions[dirSE *size_Mat]; - dist.f[dirNW ] = &distributions[dirNW *size_Mat]; - dist.f[dirTE ] = &distributions[dirTE *size_Mat]; - dist.f[dirBW ] = &distributions[dirBW *size_Mat]; - dist.f[dirBE ] = &distributions[dirBE *size_Mat]; - dist.f[dirTW ] = &distributions[dirTW *size_Mat]; - dist.f[dirTN ] = &distributions[dirTN *size_Mat]; - dist.f[dirBS ] = &distributions[dirBS *size_Mat]; - dist.f[dirBN ] = &distributions[dirBN *size_Mat]; - dist.f[dirTS ] = &distributions[dirTS *size_Mat]; - dist.f[dirREST] = &distributions[dirREST*size_Mat]; - dist.f[dirTNE ] = &distributions[dirTNE *size_Mat]; - dist.f[dirTSW ] = &distributions[dirTSW *size_Mat]; - dist.f[dirTSE ] = &distributions[dirTSE *size_Mat]; - dist.f[dirTNW ] = &distributions[dirTNW *size_Mat]; - dist.f[dirBNE ] = &distributions[dirBNE *size_Mat]; - dist.f[dirBSW ] = &distributions[dirBSW *size_Mat]; - dist.f[dirBSE ] = &distributions[dirBSE *size_Mat]; - dist.f[dirBNW ] = &distributions[dirBNW *size_Mat]; - } - else - { - dist.f[dirW ] = &distributions[dirE *size_Mat]; - dist.f[dirE ] = &distributions[dirW *size_Mat]; - dist.f[dirS ] = &distributions[dirN *size_Mat]; - dist.f[dirN ] = &distributions[dirS *size_Mat]; - dist.f[dirB ] = &distributions[dirT *size_Mat]; - dist.f[dirT ] = &distributions[dirB *size_Mat]; - dist.f[dirSW ] = &distributions[dirNE *size_Mat]; - dist.f[dirNE ] = &distributions[dirSW *size_Mat]; - dist.f[dirNW ] = &distributions[dirSE *size_Mat]; - dist.f[dirSE ] = &distributions[dirNW *size_Mat]; - dist.f[dirBW ] = &distributions[dirTE *size_Mat]; - dist.f[dirTE ] = &distributions[dirBW *size_Mat]; - dist.f[dirTW ] = &distributions[dirBE *size_Mat]; - dist.f[dirBE ] = &distributions[dirTW *size_Mat]; - dist.f[dirBS ] = &distributions[dirTN *size_Mat]; - dist.f[dirTN ] = &distributions[dirBS *size_Mat]; - dist.f[dirTS ] = &distributions[dirBN *size_Mat]; - dist.f[dirBN ] = &distributions[dirTS *size_Mat]; - dist.f[dirREST] = &distributions[dirREST*size_Mat]; - dist.f[dirBSW ] = &distributions[dirTNE *size_Mat]; - dist.f[dirBNE ] = &distributions[dirTSW *size_Mat]; - dist.f[dirBNW ] = &distributions[dirTSE *size_Mat]; - dist.f[dirBSE ] = &distributions[dirTNW *size_Mat]; - dist.f[dirTSW ] = &distributions[dirBNE *size_Mat]; - dist.f[dirTNE ] = &distributions[dirBSW *size_Mat]; - dist.f[dirTNW ] = &distributions[dirBSE *size_Mat]; - dist.f[dirTSE ] = &distributions[dirBNW *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - //! - Set neighbor indices (necessary for indirect addressing) - uint kw = neighborX[k]; - uint ks = neighborY[k]; - uint kb = neighborZ[k]; - uint ksw = neighborY[kw]; - uint kbw = neighborZ[kw]; - uint kbs = neighborZ[ks]; - uint kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions - //! - real mfcbb = (dist.f[dirE ])[k]; - real mfabb = (dist.f[dirW ])[kw]; - real mfbcb = (dist.f[dirN ])[k]; - real mfbab = (dist.f[dirS ])[ks]; - real mfbbc = (dist.f[dirT ])[k]; - real mfbba = (dist.f[dirB ])[kb]; - real mfccb = (dist.f[dirNE ])[k]; - real mfaab = (dist.f[dirSW ])[ksw]; - real mfcab = (dist.f[dirSE ])[ks]; - real mfacb = (dist.f[dirNW ])[kw]; - real mfcbc = (dist.f[dirTE ])[k]; - real mfaba = (dist.f[dirBW ])[kbw]; - real mfcba = (dist.f[dirBE ])[kb]; - real mfabc = (dist.f[dirTW ])[kw]; - real mfbcc = (dist.f[dirTN ])[k]; - real mfbaa = (dist.f[dirBS ])[kbs]; - real mfbca = (dist.f[dirBN ])[kb]; - real mfbac = (dist.f[dirTS ])[ks]; - real mfbbb = (dist.f[dirREST])[k]; - real mfccc = (dist.f[dirTNE ])[k]; - real mfaac = (dist.f[dirTSW ])[ksw]; - real mfcac = (dist.f[dirTSE ])[ks]; - real mfacc = (dist.f[dirTNW ])[kw]; - real mfcca = (dist.f[dirBNE ])[kb]; - real mfaaa = (dist.f[dirBSW ])[kbsw]; - real mfcaa = (dist.f[dirBSE ])[kbs]; - real mfaca = (dist.f[dirBNW ])[kbw]; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - real rho = c1o1 + drho; - real OOrho = c1o1 / rho; - real vvx = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) * OOrho; - real vvy = - ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) * OOrho; - real vvz = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) * OOrho; - //////////////////////////////////////////////////////////////////////////////////// - //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - vvx += forces[0] * c1o2; - vvy += forces[1] * c1o2; - vvz += forces[2] * c1o2; - //////////////////////////////////////////////////////////////////////////////////// - // calculate the square of velocities for this lattice node - real vx2 = vvx*vvx; - real vy2 = vvy*vvy; - real vz2 = vvz*vvz; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - real wadjust; - real qudricLimitP = c1o100; - real qudricLimitM = c1o100; - real qudricLimitD = c1o100; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! see also Eq. (6)-(14) in \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); - VF::LBM::forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); - VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - VF::LBM::forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); - VF::LBM::forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); - VF::LBM::forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); - VF::LBM::forwardChimera( mfaab, mfabb, mfacb, vvy, vy2); - VF::LBM::forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); - VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - VF::LBM::forwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); - VF::LBM::forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); - VF::LBM::forwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); - VF::LBM::forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - VF::LBM::forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); - VF::LBM::forwardChimera( mfaba, mfbba, mfcba, vvx, vx2); - VF::LBM::forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); - VF::LBM::forwardChimera( mfaab, mfbab, mfcab, vvx, vx2); - VF::LBM::forwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); - VF::LBM::forwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); - VF::LBM::forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); - VF::LBM::forwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); - VF::LBM::forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); - //////////////////////////////////////////////////////////////////////////////////// - //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. - //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. - //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. - //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. - //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). - //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. - //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. - //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. - //! - //////////////////////////////////////////////////////////// - //2. - real OxxPyyPzz = c1o1; - //////////////////////////////////////////////////////////// - //3. - real OxyyPxzz = c8o1 * (-c2o1 + omega) * ( c1o1 + c2o1*omega) / (-c8o1 - c14o1*omega + c7o1*omega*omega); - real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1*omega) / (c56o1 - c50o1*omega + c9o1*omega*omega); - real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1*omega + c3o1*omega*omega) / (c48o1 + c152o1*omega - c130o1*omega*omega + c29o1*omega*omega*omega); - //////////////////////////////////////////////////////////// - //4. - real O4 = c1o1; - //////////////////////////////////////////////////////////// - //5. - real O5 = c1o1; - //////////////////////////////////////////////////////////// - //6. - real O6 = c1o1; - //////////////////////////////////////////////////////////////////////////////////// - //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity). - //! - real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); - real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute cumulants from central moments according to Eq. (20)-(23) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - //////////////////////////////////////////////////////////// - //4. - real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho; - real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho; - real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho; - real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho * OOrho)); - real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho * OOrho)); - real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho * OOrho)); - //////////////////////////////////////////////////////////// - //5. - real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho; - real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho; - real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho; - //////////////////////////////////////////////////////////// - //6. - real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho - + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2o1 * (mfcaa * mfaca * mfaac) - + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - - c1o9 * (mfcaa + mfaca + mfaac) * OOrho - + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 - + c1o27*((drho * drho - drho) * OOrho * OOrho)); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute linear combinations of second and third order cumulants - //! - //////////////////////////////////////////////////////////// - //2. - real mxxPyyPzz = mfcaa + mfaca + mfaac; - real mxxMyy = mfcaa - mfaca; - real mxxMzz = mfcaa - mfaac; - //////////////////////////////////////////////////////////// - //3. - real mxxyPyzz = mfcba + mfabc; - real mxxyMyzz = mfcba - mfabc; - real mxxzPyyz = mfcab + mfacb; - real mxxzMyyz = mfcab - mfacb; - real mxyyPxzz = mfbca + mfbac; - real mxyyMxzz = mfbca - mfbac; - //////////////////////////////////////////////////////////////////////////////////// - //incl. correction - //////////////////////////////////////////////////////////// - //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! Further explanations of the correction in viscosity in Appendix H of - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! Note that the division by rho is omitted here as we need rho times the gradients later. - //! - real Dxy = -c3o1*omega*mfbba; - real Dxz = -c3o1*omega*mfbab; - real Dyz = -c3o1*omega*mfabb; - real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); - real dyuy = dxux + omega * c3o2 * mxxMyy; - real dzuz = dxux + omega * c3o2 * mxxMzz; - //////////////////////////////////////////////////////////// - //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); - mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); - mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - //////////////////////////////////////////////////////////////////////////////////// - ////no correction - //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); - //mxxMyy += -(-omega) * (-mxxMyy); - //mxxMzz += -(-omega) * (-mxxMzz); - ////////////////////////////////////////////////////////////////////////// - mfabb += omega * (-mfabb); - mfbab += omega * (-mfbab); - mfbba += omega * (-mfbba); - //////////////////////////////////////////////////////////////////////////////////// - //relax - ////////////////////////////////////////////////////////////////////////// - // incl. limiter - //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - wadjust = Oxyz + (c1o1 - Oxyz)*abs(mfbbb) / (abs(mfbbb) + qudricLimitD); - mfbbb += wadjust * (-mfbbb); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); - mxxyPyzz += wadjust * (-mxxyPyzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); - mxxyMyzz += wadjust * (-mxxyMyzz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); - mxxzPyyz += wadjust * (-mxxzPyyz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); - mxxzMyyz += wadjust * (-mxxzMyyz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); - mxyyPxzz += wadjust * (-mxyyPxzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); - mxyyMxzz += wadjust * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - // no limiter - //mfbbb += OxyyMxzz * (-mfbbb); - //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); - //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); - //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); - //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); - //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); - //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute inverse linear combinations of second and third order cumulants - //! - mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); - mfaca = c1o3 * (-c2o1* mxxMyy + mxxMzz + mxxPyyPzz); - mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz); - mfcba = ( mxxyMyzz + mxxyPyzz) * c1o2; - mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; - mfcab = ( mxxzMyyz + mxxzPyyz) * c1o2; - mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; - mfbca = ( mxyyMxzz + mxyyPxzz) * c1o2; - mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; - ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - //4. - // no limiter - //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc); - CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac); - CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca); - CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc); - CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb); - CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb); - ////////////////////////////////////////////////////////////////////////// - //5. - CUMbcc += O5 * (-CUMbcc); - CUMcbc += O5 * (-CUMcbc); - CUMccb += O5 * (-CUMccb); - ////////////////////////////////////////////////////////////////////////// - //6. - CUMccc += O6 * (-CUMccc); - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - ////////////////////////////////////////////////////////////////////////// - //4. - mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; - mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho; - mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; - mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho))*c1o9; - mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho))*c1o9; - mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho))*c1o9; - ////////////////////////////////////////////////////////////////////////// - //5. - mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho; - mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho; - mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; - ////////////////////////////////////////////////////////////////////////// - //6. - mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho - + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2o1 * (mfcaa * mfaca * mfaac) - + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - - c1o9 * (mfcaa + mfaca + mfaac) * OOrho - + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 - + c1o27*((drho * drho - drho) * OOrho * OOrho)); - //////////////////////////////////////////////////////////////////////////////////// - //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - mfbaa = -mfbaa; - mfaba = -mfaba; - mfaab = -mfaab; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! see also Eq. (88)-(96) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - VF::LBM::backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); - VF::LBM::backwardChimera( mfaba, mfbba, mfcba, vvx, vx2); - VF::LBM::backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); - VF::LBM::backwardChimera( mfaab, mfbab, mfcab, vvx, vx2); - VF::LBM::backwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); - VF::LBM::backwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); - VF::LBM::backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); - VF::LBM::backwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); - VF::LBM::backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9); - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); - VF::LBM::backwardChimera( mfaab, mfabb, mfacb, vvy, vy2); - VF::LBM::backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); - VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - VF::LBM::backwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); - VF::LBM::backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); - VF::LBM::backwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); - VF::LBM::backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); - VF::LBM::backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); - VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - VF::LBM::backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); - VF::LBM::backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); - VF::LBM::backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Write distributions: style of reading and writing the distributions from/to - //! stored arrays dependent on timestep is based on the esoteric twist algorithm - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> - //! - (dist.f[dirE ])[k ] = mfabb; - (dist.f[dirW ])[kw ] = mfcbb; - (dist.f[dirN ])[k ] = mfbab; - (dist.f[dirS ])[ks ] = mfbcb; - (dist.f[dirT ])[k ] = mfbba; - (dist.f[dirB ])[kb ] = mfbbc; - (dist.f[dirNE ])[k ] = mfaab; - (dist.f[dirSW ])[ksw ] = mfccb; - (dist.f[dirSE ])[ks ] = mfacb; - (dist.f[dirNW ])[kw ] = mfcab; - (dist.f[dirTE ])[k ] = mfaba; - (dist.f[dirBW ])[kbw ] = mfcbc; - (dist.f[dirBE ])[kb ] = mfabc; - (dist.f[dirTW ])[kw ] = mfcba; - (dist.f[dirTN ])[k ] = mfbaa; - (dist.f[dirBS ])[kbs ] = mfbcc; - (dist.f[dirBN ])[kb ] = mfbac; - (dist.f[dirTS ])[ks ] = mfbca; - (dist.f[dirREST])[k ] = mfbbb; - (dist.f[dirTNE ])[k ] = mfaaa; - (dist.f[dirTSE ])[ks ] = mfaca; - (dist.f[dirBNE ])[kb ] = mfaac; - (dist.f[dirBSE ])[kbs ] = mfacc; - (dist.f[dirTNW ])[kw ] = mfcaa; - (dist.f[dirTSW ])[ksw ] = mfcca; - (dist.f[dirBNW ])[kbw ] = mfcac; - (dist.f[dirBSW ])[kbsw] = mfccc; - } -} -//////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/CumulantK17ChimeraKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/CumulantK17ChimeraKernel.cu new file mode 100644 index 000000000..be940fd37 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/GPU/CumulantK17ChimeraKernel.cu @@ -0,0 +1,742 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Cumulant27chim.cu +//! \ingroup GPU +//! \author Martin Schoenherr +//======================================================================================= +/* Device code */ +#include "LBM/LB.h" +#include "LBM/D3Q27.h" +#include "Core/RealConstants.h" +#include "math.h" + + +#include <lbm/CumulantChimeraPreCompiled.h> + + +__device__ Distributions27 getDistributions27(real* distributions, unsigned int size_Mat, bool isEvenTimestep) +{ + Distributions27 dist; + if (isEvenTimestep) + { + dist.f[dirE ] = &distributions[dirE *size_Mat]; + dist.f[dirW ] = &distributions[dirW *size_Mat]; + dist.f[dirN ] = &distributions[dirN *size_Mat]; + dist.f[dirS ] = &distributions[dirS *size_Mat]; + dist.f[dirT ] = &distributions[dirT *size_Mat]; + dist.f[dirB ] = &distributions[dirB *size_Mat]; + dist.f[dirNE ] = &distributions[dirNE *size_Mat]; + dist.f[dirSW ] = &distributions[dirSW *size_Mat]; + dist.f[dirSE ] = &distributions[dirSE *size_Mat]; + dist.f[dirNW ] = &distributions[dirNW *size_Mat]; + dist.f[dirTE ] = &distributions[dirTE *size_Mat]; + dist.f[dirBW ] = &distributions[dirBW *size_Mat]; + dist.f[dirBE ] = &distributions[dirBE *size_Mat]; + dist.f[dirTW ] = &distributions[dirTW *size_Mat]; + dist.f[dirTN ] = &distributions[dirTN *size_Mat]; + dist.f[dirBS ] = &distributions[dirBS *size_Mat]; + dist.f[dirBN ] = &distributions[dirBN *size_Mat]; + dist.f[dirTS ] = &distributions[dirTS *size_Mat]; + dist.f[dirREST] = &distributions[dirREST*size_Mat]; + dist.f[dirTNE ] = &distributions[dirTNE *size_Mat]; + dist.f[dirTSW ] = &distributions[dirTSW *size_Mat]; + dist.f[dirTSE ] = &distributions[dirTSE *size_Mat]; + dist.f[dirTNW ] = &distributions[dirTNW *size_Mat]; + dist.f[dirBNE ] = &distributions[dirBNE *size_Mat]; + dist.f[dirBSW ] = &distributions[dirBSW *size_Mat]; + dist.f[dirBSE ] = &distributions[dirBSE *size_Mat]; + dist.f[dirBNW ] = &distributions[dirBNW *size_Mat]; + } + else + { + dist.f[dirW ] = &distributions[dirE *size_Mat]; + dist.f[dirE ] = &distributions[dirW *size_Mat]; + dist.f[dirS ] = &distributions[dirN *size_Mat]; + dist.f[dirN ] = &distributions[dirS *size_Mat]; + dist.f[dirB ] = &distributions[dirT *size_Mat]; + dist.f[dirT ] = &distributions[dirB *size_Mat]; + dist.f[dirSW ] = &distributions[dirNE *size_Mat]; + dist.f[dirNE ] = &distributions[dirSW *size_Mat]; + dist.f[dirNW ] = &distributions[dirSE *size_Mat]; + dist.f[dirSE ] = &distributions[dirNW *size_Mat]; + dist.f[dirBW ] = &distributions[dirTE *size_Mat]; + dist.f[dirTE ] = &distributions[dirBW *size_Mat]; + dist.f[dirTW ] = &distributions[dirBE *size_Mat]; + dist.f[dirBE ] = &distributions[dirTW *size_Mat]; + dist.f[dirBS ] = &distributions[dirTN *size_Mat]; + dist.f[dirTN ] = &distributions[dirBS *size_Mat]; + dist.f[dirTS ] = &distributions[dirBN *size_Mat]; + dist.f[dirBN ] = &distributions[dirTS *size_Mat]; + dist.f[dirREST] = &distributions[dirREST*size_Mat]; + dist.f[dirBSW ] = &distributions[dirTNE *size_Mat]; + dist.f[dirBNE ] = &distributions[dirTSW *size_Mat]; + dist.f[dirBNW ] = &distributions[dirTSE *size_Mat]; + dist.f[dirBSE ] = &distributions[dirTNW *size_Mat]; + dist.f[dirTSW ] = &distributions[dirBNE *size_Mat]; + dist.f[dirTNE ] = &distributions[dirBSW *size_Mat]; + dist.f[dirTNW ] = &distributions[dirBSE *size_Mat]; + dist.f[dirTSE ] = &distributions[dirBNW *size_Mat]; + } + return dist; +} + +struct DistributionWrapper +{ + __device__ DistributionWrapper( + real* distributions, + unsigned int size_Mat, + bool isEvenTimestep, + uint k, + uint* neighborX, + uint* neighborY, + uint* neighborZ) : + dist(getDistributions27(distributions, size_Mat, isEvenTimestep)), + k(k), + kw (neighborX[k]), + ks (neighborY[k]), + kb (neighborZ[k]), + ksw (neighborY[kw]), + kbw (neighborZ[kw]), + kbs (neighborZ[ks]), + kbsw(neighborZ[ksw]) + { + read(); + } + + __device__ void read() + { + distribution.f[VF::LBM::DIR::PZZ] = (dist.f[dirE ])[k]; + distribution.f[VF::LBM::DIR::MZZ] = (dist.f[dirW ])[kw]; + distribution.f[VF::LBM::DIR::ZPZ] = (dist.f[dirN ])[k]; + distribution.f[VF::LBM::DIR::ZMZ] = (dist.f[dirS ])[ks]; + distribution.f[VF::LBM::DIR::ZZP] = (dist.f[dirT ])[k]; + distribution.f[VF::LBM::DIR::ZZM] = (dist.f[dirB ])[kb]; + distribution.f[VF::LBM::DIR::PPZ] = (dist.f[dirNE ])[k]; + distribution.f[VF::LBM::DIR::MMZ] = (dist.f[dirSW ])[ksw]; + distribution.f[VF::LBM::DIR::PMZ] = (dist.f[dirSE ])[ks]; + distribution.f[VF::LBM::DIR::MPZ] = (dist.f[dirNW ])[kw]; + distribution.f[VF::LBM::DIR::PZP] = (dist.f[dirTE ])[k]; + distribution.f[VF::LBM::DIR::MZM] = (dist.f[dirBW ])[kbw]; + distribution.f[VF::LBM::DIR::PZM] = (dist.f[dirBE ])[kb]; + distribution.f[VF::LBM::DIR::MZP] = (dist.f[dirTW ])[kw]; + distribution.f[VF::LBM::DIR::ZPP] = (dist.f[dirTN ])[k]; + distribution.f[VF::LBM::DIR::ZMM] = (dist.f[dirBS ])[kbs]; + distribution.f[VF::LBM::DIR::ZPM] = (dist.f[dirBN ])[kb]; + distribution.f[VF::LBM::DIR::ZMP] = (dist.f[dirTS ])[ks]; + distribution.f[VF::LBM::DIR::PPP] = (dist.f[dirTNE ])[k]; + distribution.f[VF::LBM::DIR::MPP] = (dist.f[dirTNW ])[kw]; + distribution.f[VF::LBM::DIR::PMP] = (dist.f[dirTSE ])[ks]; + distribution.f[VF::LBM::DIR::MMP] = (dist.f[dirTSW ])[ksw]; + distribution.f[VF::LBM::DIR::PPM] = (dist.f[dirBNE ])[kb]; + distribution.f[VF::LBM::DIR::MPM] = (dist.f[dirBNW ])[kbw]; + distribution.f[VF::LBM::DIR::PMM] = (dist.f[dirBSE ])[kbs]; + distribution.f[VF::LBM::DIR::MMM] = (dist.f[dirBSW ])[kbsw]; + distribution.f[VF::LBM::DIR::ZZZ] = (dist.f[dirREST])[k]; + } + + __device__ void write() + { + (dist.f[dirE ])[k] = distribution.f[VF::LBM::DIR::PZZ]; + (dist.f[dirW ])[kw] = distribution.f[VF::LBM::DIR::MZZ]; + (dist.f[dirN ])[k] = distribution.f[VF::LBM::DIR::ZPZ]; + (dist.f[dirS ])[ks] = distribution.f[VF::LBM::DIR::ZMZ]; + (dist.f[dirT ])[k] = distribution.f[VF::LBM::DIR::ZZP]; + (dist.f[dirB ])[kb] = distribution.f[VF::LBM::DIR::ZZM]; + (dist.f[dirNE ])[k] = distribution.f[VF::LBM::DIR::PPZ]; + (dist.f[dirSW ])[ksw] = distribution.f[VF::LBM::DIR::MMZ]; + (dist.f[dirSE ])[ks] = distribution.f[VF::LBM::DIR::PMZ]; + (dist.f[dirNW ])[kw] = distribution.f[VF::LBM::DIR::MPZ]; + (dist.f[dirTE ])[k] = distribution.f[VF::LBM::DIR::PZP]; + (dist.f[dirBW ])[kbw] = distribution.f[VF::LBM::DIR::MZM]; + (dist.f[dirBE ])[kb] = distribution.f[VF::LBM::DIR::PZM]; + (dist.f[dirTW ])[kw] = distribution.f[VF::LBM::DIR::MZP]; + (dist.f[dirTN ])[k] = distribution.f[VF::LBM::DIR::ZPP]; + (dist.f[dirBS ])[kbs] = distribution.f[VF::LBM::DIR::ZMM]; + (dist.f[dirBN ])[kb] = distribution.f[VF::LBM::DIR::ZPM]; + (dist.f[dirTS ])[ks] = distribution.f[VF::LBM::DIR::ZMP]; + (dist.f[dirTNE ])[k] = distribution.f[VF::LBM::DIR::PPP]; + (dist.f[dirTNW ])[kw] = distribution.f[VF::LBM::DIR::MPP]; + (dist.f[dirTSE ])[ks] = distribution.f[VF::LBM::DIR::PMP]; + (dist.f[dirTSW ])[ksw] = distribution.f[VF::LBM::DIR::MMP]; + (dist.f[dirBNE ])[kb] = distribution.f[VF::LBM::DIR::PPM]; + (dist.f[dirBNW ])[kbw] = distribution.f[VF::LBM::DIR::MPM]; + (dist.f[dirBSE ])[kbs] = distribution.f[VF::LBM::DIR::PMM]; + (dist.f[dirBSW ])[kbsw] = distribution.f[VF::LBM::DIR::MMM]; + (dist.f[dirREST])[k] = distribution.f[VF::LBM::DIR::ZZZ]; + } + + Distributions27 dist; + + VF::LBM::Distribution27 distribution; + + const uint k; + const uint kw; + const uint ks; + const uint kb; + const uint ksw; + const uint kbw; + const uint kbs; + const uint kbsw; +}; + +__device__ unsigned int getNodeIndex() +{ + const unsigned x = threadIdx.x; + const unsigned y = blockIdx.x; + const unsigned z = blockIdx.y; + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + return nx*(ny*z + y) + x; +} + +__device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType) +{ + return (k < size_Mat) && (nodeType == GEO_FLUID); +} + +__device__ void getLevelForce(real fx, real fy, real fz, int level, real* forces) +{ + real fx_t {1.}, fy_t {1.}, fz_t {1.}; + for (int i = 0; i < level; i++) + { + fx_t *= c2o1; + fy_t *= c2o1; + fz_t *= c2o1; + } + + forces[0] = fx / fx_t; + forces[1] = fy / fy_t; + forces[2] = fz / fz_t; +} + + +extern "C" __global__ void Cumulant_K17_LBM_Device_Kernel( + real omega, + uint* typeOfGridNode, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + real* distributions, + int size_Mat, + int level, + real* forces, + bool isEvenTimestep) +{ + const uint k = getNodeIndex(); + const uint nodeType = typeOfGridNode[k]; + + if (isValidFluidNode(k, size_Mat, nodeType)) + { + DistributionWrapper distributionWrapper { + distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ + }; + + real level_forces[3]; + getLevelForce(forces[0], forces[1], forces[2], level, level_forces); + + VF::LBM::cumulantChimera(distributionWrapper.distribution, omega, level_forces); + + distributionWrapper.write(); + } +} + + + +//////////////////////////////////////////////////////////////////////////////// +extern "C" __global__ void Cumulant_K17_LBM_Device_Kernel_old( + real omega, + uint* typeOfGridNode, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + real* distributions, + int size_Mat, + int level, + real* forces, + bool isEvenTimestep) +{ + ////////////////////////////////////////////////////////////////////////// + //! Cumulant K17 Kernel is based on \ref + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! and \ref + //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> + //! + //! The cumulant kernel is executed in the following steps + //! + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim. + //! + const unsigned x = threadIdx.x; + const unsigned y = blockIdx.x; + const unsigned z = blockIdx.y; + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + const unsigned k = nx*(ny*z + y) + x; + + ////////////////////////////////////////////////////////////////////////// + // run for all indices in size_Mat and fluid nodes + if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref + //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist = getDistributions27(distributions, size_Mat, isEvenTimestep); + + //////////////////////////////////////////////////////////////////////////////// + //! - Set neighbor indices (necessary for indirect addressing) + uint kw = neighborX[k]; + uint ks = neighborY[k]; + uint kb = neighborZ[k]; + uint ksw = neighborY[kw]; + uint kbw = neighborZ[kw]; + uint kbs = neighborZ[ks]; + uint kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions + //! + real mfcbb = (dist.f[dirE ])[k]; + real mfabb = (dist.f[dirW ])[kw]; + real mfbcb = (dist.f[dirN ])[k]; + real mfbab = (dist.f[dirS ])[ks]; + real mfbbc = (dist.f[dirT ])[k]; + real mfbba = (dist.f[dirB ])[kb]; + real mfccb = (dist.f[dirNE ])[k]; + real mfaab = (dist.f[dirSW ])[ksw]; + real mfcab = (dist.f[dirSE ])[ks]; + real mfacb = (dist.f[dirNW ])[kw]; + real mfcbc = (dist.f[dirTE ])[k]; + real mfaba = (dist.f[dirBW ])[kbw]; + real mfcba = (dist.f[dirBE ])[kb]; + real mfabc = (dist.f[dirTW ])[kw]; + real mfbcc = (dist.f[dirTN ])[k]; + real mfbaa = (dist.f[dirBS ])[kbs]; + real mfbca = (dist.f[dirBN ])[kb]; + real mfbac = (dist.f[dirTS ])[ks]; + real mfbbb = (dist.f[dirREST])[k]; + real mfccc = (dist.f[dirTNE ])[k]; + real mfaac = (dist.f[dirTSW ])[ksw]; + real mfcac = (dist.f[dirTSE ])[ks]; + real mfacc = (dist.f[dirTNW ])[kw]; + real mfcca = (dist.f[dirBNE ])[kb]; + real mfaaa = (dist.f[dirBSW ])[kbsw]; + real mfcaa = (dist.f[dirBSE ])[kbs]; + real mfaca = (dist.f[dirBNW ])[kbw]; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! + real drho = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + real rho = c1o1 + drho; + real OOrho = c1o1 / rho; + real vvx = + ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) * OOrho; + real vvy = + ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) * OOrho; + real vvz = + ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) * OOrho; + //////////////////////////////////////////////////////////////////////////////////// + //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! + real fx = forces[0]; + real fy = forces[1]; + real fz = forces[2]; + + real fx_t {1.}, fy_t {1.}, fz_t {1.}; + for (int i = 0; i < level; i++) + { + fx_t *= c2o1; + fy_t *= c2o1; + fz_t *= c2o1; + } + + fx /= fx_t; + fy /= fy_t; + fz /= fz_t; + //real forces[3] {fx, fy, fz}; + + vvx += fx * c1o2; + vvy += fy * c1o2; + vvz += fz * c1o2; + //////////////////////////////////////////////////////////////////////////////////// + // calculate the square of velocities for this lattice node + real vx2 = vvx*vvx; + real vy2 = vvy*vvy; + real vz2 = vvz*vvz; + //////////////////////////////////////////////////////////////////////////////////// + //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + real wadjust; + real qudricLimitP = c1o100; + real qudricLimitM = c1o100; + real qudricLimitD = c1o100; + //////////////////////////////////////////////////////////////////////////////////// + //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! see also Eq. (6)-(14) in \ref + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////////////// + // Z - Dir + VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); + VF::LBM::forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); + VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); + VF::LBM::forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); + VF::LBM::forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); + //////////////////////////////////////////////////////////////////////////////////// + // Y - Dir + VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); + VF::LBM::forwardChimera( mfaab, mfabb, mfacb, vvy, vy2); + VF::LBM::forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); + VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); + VF::LBM::forwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); + VF::LBM::forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); + VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); + VF::LBM::forwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); + VF::LBM::forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); + //////////////////////////////////////////////////////////////////////////////////// + // X - Dir + VF::LBM::forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); + VF::LBM::forwardChimera( mfaba, mfbba, mfcba, vvx, vx2); + VF::LBM::forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); + VF::LBM::forwardChimera( mfaab, mfbab, mfcab, vvx, vx2); + VF::LBM::forwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); + VF::LBM::forwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); + VF::LBM::forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); + VF::LBM::forwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); + VF::LBM::forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); + //////////////////////////////////////////////////////////////////////////////////// + //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. + //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. + //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. + //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. + //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). + //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. + //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. + //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. + //! + //////////////////////////////////////////////////////////// + //2. + real OxxPyyPzz = c1o1; + //////////////////////////////////////////////////////////// + //3. + real OxyyPxzz = c8o1 * (-c2o1 + omega) * ( c1o1 + c2o1*omega) / (-c8o1 - c14o1*omega + c7o1*omega*omega); + real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1*omega) / (c56o1 - c50o1*omega + c9o1*omega*omega); + real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1*omega + c3o1*omega*omega) / (c48o1 + c152o1*omega - c130o1*omega*omega + c29o1*omega*omega*omega); + //////////////////////////////////////////////////////////// + //4. + real O4 = c1o1; + //////////////////////////////////////////////////////////// + //5. + real O5 = c1o1; + //////////////////////////////////////////////////////////// + //6. + real O6 = c1o1; + //////////////////////////////////////////////////////////////////////////////////// + //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity). + //! + real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); + real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute cumulants from central moments according to Eq. (20)-(23) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + //////////////////////////////////////////////////////////// + //4. + real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho; + real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho; + real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho; + real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho * OOrho)); + real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho * OOrho)); + real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho * OOrho)); + //////////////////////////////////////////////////////////// + //5. + real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho; + real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho; + real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho; + //////////////////////////////////////////////////////////// + //6. + real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb + - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) + - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) + - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho + + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + + c2o1 * (mfcaa * mfaca * mfaac) + + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho + - c1o3 * (mfacc + mfcac + mfcca) * OOrho + - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 + + c1o27*((drho * drho - drho) * OOrho * OOrho)); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute linear combinations of second and third order cumulants + //! + //////////////////////////////////////////////////////////// + //2. + real mxxPyyPzz = mfcaa + mfaca + mfaac; + real mxxMyy = mfcaa - mfaca; + real mxxMzz = mfcaa - mfaac; + //////////////////////////////////////////////////////////// + //3. + real mxxyPyzz = mfcba + mfabc; + real mxxyMyzz = mfcba - mfabc; + real mxxzPyyz = mfcab + mfacb; + real mxxzMyyz = mfcab - mfacb; + real mxyyPxzz = mfbca + mfbac; + real mxyyMxzz = mfbca - mfbac; + //////////////////////////////////////////////////////////////////////////////////// + //incl. correction + //////////////////////////////////////////////////////////// + //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! Further explanations of the correction in viscosity in Appendix H of + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! Note that the division by rho is omitted here as we need rho times the gradients later. + //! + real Dxy = -c3o1*omega*mfbba; + real Dxz = -c3o1*omega*mfbab; + real Dyz = -c3o1*omega*mfabb; + real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); + real dyuy = dxux + omega * c3o2 * mxxMyy; + real dzuz = dxux + omega * c3o2 * mxxMzz; + //////////////////////////////////////////////////////////// + //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); + mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); + mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); + //////////////////////////////////////////////////////////////////////////////////// + ////no correction + //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); + //mxxMyy += -(-omega) * (-mxxMyy); + //mxxMzz += -(-omega) * (-mxxMzz); + ////////////////////////////////////////////////////////////////////////// + mfabb += omega * (-mfabb); + mfbab += omega * (-mfbab); + mfbba += omega * (-mfbba); + //////////////////////////////////////////////////////////////////////////////////// + //relax + ////////////////////////////////////////////////////////////////////////// + // incl. limiter + //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + wadjust = Oxyz + (c1o1 - Oxyz)*abs(mfbbb) / (abs(mfbbb) + qudricLimitD); + mfbbb += wadjust * (-mfbbb); + wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); + mxxyPyzz += wadjust * (-mxxyPyzz); + wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); + mxxyMyzz += wadjust * (-mxxyMyzz); + wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); + mxxzPyyz += wadjust * (-mxxzPyyz); + wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); + mxxzMyyz += wadjust * (-mxxzMyyz); + wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); + mxyyPxzz += wadjust * (-mxyyPxzz); + wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); + mxyyMxzz += wadjust * (-mxyyMxzz); + ////////////////////////////////////////////////////////////////////////// + // no limiter + //mfbbb += OxyyMxzz * (-mfbbb); + //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); + //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); + //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); + //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); + //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); + //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute inverse linear combinations of second and third order cumulants + //! + mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); + mfaca = c1o3 * (-c2o1* mxxMyy + mxxMzz + mxxPyyPzz); + mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz); + mfcba = ( mxxyMyzz + mxxyPyzz) * c1o2; + mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; + mfcab = ( mxxzMyyz + mxxzPyyz) * c1o2; + mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; + mfbca = ( mxyyMxzz + mxyyPxzz) * c1o2; + mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + //4. + // no limiter + //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc); + CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac); + CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca); + CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc); + CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb); + CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb); + ////////////////////////////////////////////////////////////////////////// + //5. + CUMbcc += O5 * (-CUMbcc); + CUMcbc += O5 * (-CUMcbc); + CUMccb += O5 * (-CUMccb); + ////////////////////////////////////////////////////////////////////////// + //6. + CUMccc += O6 * (-CUMccc); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + ////////////////////////////////////////////////////////////////////////// + //4. + mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; + mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho; + mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; + mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho))*c1o9; + mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho))*c1o9; + mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho))*c1o9; + ////////////////////////////////////////////////////////////////////////// + //5. + mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho; + mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho; + mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; + ////////////////////////////////////////////////////////////////////////// + //6. + mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb + - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) + - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) + - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho + + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + + c2o1 * (mfcaa * mfaca * mfaac) + + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho + - c1o3 * (mfacc + mfcac + mfcca) * OOrho + - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 + + c1o27*((drho * drho - drho) * OOrho * OOrho)); + //////////////////////////////////////////////////////////////////////////////////// + //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! + mfbaa = -mfbaa; + mfaba = -mfaba; + mfaab = -mfaab; + //////////////////////////////////////////////////////////////////////////////////// + //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! see also Eq. (88)-(96) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////////////// + // X - Dir + VF::LBM::backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); + VF::LBM::backwardChimera( mfaba, mfbba, mfcba, vvx, vx2); + VF::LBM::backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); + VF::LBM::backwardChimera( mfaab, mfbab, mfcab, vvx, vx2); + VF::LBM::backwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); + VF::LBM::backwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); + VF::LBM::backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); + VF::LBM::backwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); + VF::LBM::backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9); + //////////////////////////////////////////////////////////////////////////////////// + // Y - Dir + VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); + VF::LBM::backwardChimera( mfaab, mfabb, mfacb, vvy, vy2); + VF::LBM::backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); + VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); + VF::LBM::backwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); + VF::LBM::backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); + VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); + VF::LBM::backwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); + VF::LBM::backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); + //////////////////////////////////////////////////////////////////////////////////// + // Z - Dir + VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); + VF::LBM::backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); + VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); + VF::LBM::backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); + VF::LBM::backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Write distributions: style of reading and writing the distributions from/to + //! stored arrays dependent on timestep is based on the esoteric twist algorithm + //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + (dist.f[dirE ])[k ] = mfabb; + (dist.f[dirW ])[kw ] = mfcbb; + (dist.f[dirN ])[k ] = mfbab; + (dist.f[dirS ])[ks ] = mfbcb; + (dist.f[dirT ])[k ] = mfbba; + (dist.f[dirB ])[kb ] = mfbbc; + (dist.f[dirNE ])[k ] = mfaab; + (dist.f[dirSW ])[ksw ] = mfccb; + (dist.f[dirSE ])[ks ] = mfacb; + (dist.f[dirNW ])[kw ] = mfcab; + (dist.f[dirTE ])[k ] = mfaba; + (dist.f[dirBW ])[kbw ] = mfcbc; + (dist.f[dirBE ])[kb ] = mfabc; + (dist.f[dirTW ])[kw ] = mfcba; + (dist.f[dirTN ])[k ] = mfbaa; + (dist.f[dirBS ])[kbs ] = mfbcc; + (dist.f[dirBN ])[kb ] = mfbac; + (dist.f[dirTS ])[ks ] = mfbca; + (dist.f[dirREST])[k ] = mfbbb; + (dist.f[dirTNE ])[k ] = mfaaa; + (dist.f[dirTSE ])[ks ] = mfaca; + (dist.f[dirBNE ])[kb ] = mfaac; + (dist.f[dirBSE ])[kbs ] = mfacc; + (dist.f[dirTNW ])[kw ] = mfcaa; + (dist.f[dirTSW ])[ksw ] = mfcca; + (dist.f[dirBNW ])[kbw ] = mfcac; + (dist.f[dirBSW ])[kbsw] = mfccc; + } +} diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt index 21ac44f7c..6331a0fe7 100644 --- a/src/lbm/CMakeLists.txt +++ b/src/lbm/CMakeLists.txt @@ -1,5 +1,7 @@ -project(lbm LANGUAGES CXX) +project(lbm LANGUAGES CXX CUDA) vf_add_library(NAME lbm PUBLIC_LINK basics) vf_add_tests() + +set_target_properties(lbm PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/src/lbm/CumulantChimeraPreCompiled.cu b/src/lbm/CumulantChimeraPreCompiled.cu new file mode 100644 index 000000000..b0a3e6254 --- /dev/null +++ b/src/lbm/CumulantChimeraPreCompiled.cu @@ -0,0 +1,433 @@ +#include "CumulantChimeraPreCompiled.h" + +#include <cmath> + +#include <basics/Core/DataTypes.h> +#include <basics/Core/RealConstants.h> + +#include "D3Q27.h" +#include "Chimera.h" +#include "MacroscopicQuantities.h" + +namespace VF +{ +namespace LBM +{ + + +////////////////////////////////////////////////////////////////////////// +//! Cumulant K17 Kernel is based on \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +//! and \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> +////////////////////////////////////////////////////////////////////////// +__host__ __device__ void cumulantChimera(Distribution27& distribution, real omega, real* forces) +{ + //////////////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to + //! stored arrays dependent on timestep is based on the esoteric twist algorithm + //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + real mfcbb = distribution.f[DIR::PZZ]; + real mfabb = distribution.f[DIR::MZZ]; + real mfbcb = distribution.f[DIR::ZPZ]; + real mfbab = distribution.f[DIR::ZMZ]; + real mfbbc = distribution.f[DIR::ZZP]; + real mfbba = distribution.f[DIR::ZZM]; + real mfccb = distribution.f[DIR::PPZ]; + real mfaab = distribution.f[DIR::MMZ]; + real mfcab = distribution.f[DIR::PMZ]; + real mfacb = distribution.f[DIR::MPZ]; + real mfcbc = distribution.f[DIR::PZP]; + real mfaba = distribution.f[DIR::MZM]; + real mfcba = distribution.f[DIR::PZM]; + real mfabc = distribution.f[DIR::MZP]; + real mfbcc = distribution.f[DIR::ZPP]; + real mfbaa = distribution.f[DIR::ZMM]; + real mfbca = distribution.f[DIR::ZPM]; + real mfbac = distribution.f[DIR::ZMP]; + real mfccc = distribution.f[DIR::PPP]; + real mfacc = distribution.f[DIR::MPP]; + real mfcac = distribution.f[DIR::PMP]; + real mfaac = distribution.f[DIR::MMP]; + real mfcca = distribution.f[DIR::PPM]; + real mfaca = distribution.f[DIR::MPM]; + real mfcaa = distribution.f[DIR::PMM]; + real mfaaa = distribution.f[DIR::MMM]; + real mfbbb = distribution.f[DIR::ZZZ]; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! + real drho = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + real rho = c1o1 + drho; + real OOrho = c1o1 / rho; + real vvx = + ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) * OOrho; + real vvy = + ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) * OOrho; + real vvz = + ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) * OOrho; + //////////////////////////////////////////////////////////////////////////////////// + //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! + vvx += forces[0] * c1o2; + vvy += forces[1] * c1o2; + vvz += forces[2] * c1o2; + //////////////////////////////////////////////////////////////////////////////////// + // calculate the square of velocities for this lattice node + real vx2 = vvx*vvx; + real vy2 = vvy*vvy; + real vz2 = vvz*vvz; + //////////////////////////////////////////////////////////////////////////////////// + //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + real wadjust; + real qudricLimitP = c1o100; + real qudricLimitM = c1o100; + real qudricLimitD = c1o100; + //////////////////////////////////////////////////////////////////////////////////// + //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! see also Eq. (6)-(14) in \ref + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////////////// + // Z - Dir + VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); + VF::LBM::forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); + VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); + VF::LBM::forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); + VF::LBM::forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); + VF::LBM::forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); + //////////////////////////////////////////////////////////////////////////////////// + // Y - Dir + VF::LBM::forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); + VF::LBM::forwardChimera( mfaab, mfabb, mfacb, vvy, vy2); + VF::LBM::forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); + VF::LBM::forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); + VF::LBM::forwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); + VF::LBM::forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); + VF::LBM::forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); + VF::LBM::forwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); + VF::LBM::forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); + //////////////////////////////////////////////////////////////////////////////////// + // X - Dir + VF::LBM::forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); + VF::LBM::forwardChimera( mfaba, mfbba, mfcba, vvx, vx2); + VF::LBM::forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); + VF::LBM::forwardChimera( mfaab, mfbab, mfcab, vvx, vx2); + VF::LBM::forwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); + VF::LBM::forwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); + VF::LBM::forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); + VF::LBM::forwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); + VF::LBM::forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); + //////////////////////////////////////////////////////////////////////////////////// + //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. + //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. + //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. + //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. + //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). + //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. + //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. + //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. + //! + //////////////////////////////////////////////////////////// + //2. + real OxxPyyPzz = c1o1; + //////////////////////////////////////////////////////////// + //3. + real OxyyPxzz = c8o1 * (-c2o1 + omega) * ( c1o1 + c2o1*omega) / (-c8o1 - c14o1*omega + c7o1*omega*omega); + real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1*omega) / (c56o1 - c50o1*omega + c9o1*omega*omega); + real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1*omega + c3o1*omega*omega) / (c48o1 + c152o1*omega - c130o1*omega*omega + c29o1*omega*omega*omega); + //////////////////////////////////////////////////////////// + //4. + real O4 = c1o1; + //////////////////////////////////////////////////////////// + //5. + real O5 = c1o1; + //////////////////////////////////////////////////////////// + //6. + real O6 = c1o1; + //////////////////////////////////////////////////////////////////////////////////// + //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity). + //! + real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); + real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute cumulants from central moments according to Eq. (20)-(23) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + //////////////////////////////////////////////////////////// + //4. + real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho; + real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho; + real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho; + real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho * OOrho)); + real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho * OOrho)); + real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho * OOrho)); + //////////////////////////////////////////////////////////// + //5. + real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho; + real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho; + real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho; + //////////////////////////////////////////////////////////// + //6. + real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb + - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) + - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) + - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho + + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + + c2o1 * (mfcaa * mfaca * mfaac) + + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho + - c1o3 * (mfacc + mfcac + mfcca) * OOrho + - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 + + c1o27*((drho * drho - drho) * OOrho * OOrho)); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute linear combinations of second and third order cumulants + //! + //////////////////////////////////////////////////////////// + //2. + real mxxPyyPzz = mfcaa + mfaca + mfaac; + real mxxMyy = mfcaa - mfaca; + real mxxMzz = mfcaa - mfaac; + //////////////////////////////////////////////////////////// + //3. + real mxxyPyzz = mfcba + mfabc; + real mxxyMyzz = mfcba - mfabc; + real mxxzPyyz = mfcab + mfacb; + real mxxzMyyz = mfcab - mfacb; + real mxyyPxzz = mfbca + mfbac; + real mxyyMxzz = mfbca - mfbac; + //////////////////////////////////////////////////////////////////////////////////// + //incl. correction + //////////////////////////////////////////////////////////// + //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! Further explanations of the correction in viscosity in Appendix H of + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! Note that the division by rho is omitted here as we need rho times the gradients later. + //! + real Dxy = -c3o1*omega*mfbba; + real Dxz = -c3o1*omega*mfbab; + real Dyz = -c3o1*omega*mfabb; + real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); + real dyuy = dxux + omega * c3o2 * mxxMyy; + real dzuz = dxux + omega * c3o2 * mxxMzz; + //////////////////////////////////////////////////////////// + //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); + mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); + mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); + //////////////////////////////////////////////////////////////////////////////////// + ////no correction + //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); + //mxxMyy += -(-omega) * (-mxxMyy); + //mxxMzz += -(-omega) * (-mxxMzz); + ////////////////////////////////////////////////////////////////////////// + mfabb += omega * (-mfabb); + mfbab += omega * (-mfbab); + mfbba += omega * (-mfbba); + //////////////////////////////////////////////////////////////////////////////////// + //relax + ////////////////////////////////////////////////////////////////////////// + // incl. limiter + //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + wadjust = Oxyz + (c1o1 - Oxyz)*abs_internal(mfbbb) / (abs_internal(mfbbb) + qudricLimitD); + mfbbb += wadjust * (-mfbbb); + wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxxyPyzz) / (abs_internal(mxxyPyzz) + qudricLimitP); + mxxyPyzz += wadjust * (-mxxyPyzz); + wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs_internal(mxxyMyzz) / (abs_internal(mxxyMyzz) + qudricLimitM); + mxxyMyzz += wadjust * (-mxxyMyzz); + wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxxzPyyz) / (abs_internal(mxxzPyyz) + qudricLimitP); + mxxzPyyz += wadjust * (-mxxzPyyz); + wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs_internal(mxxzMyyz) / (abs_internal(mxxzMyyz) + qudricLimitM); + mxxzMyyz += wadjust * (-mxxzMyyz); + wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxyyPxzz) / (abs_internal(mxyyPxzz) + qudricLimitP); + mxyyPxzz += wadjust * (-mxyyPxzz); + wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs_internal(mxyyMxzz) / (abs_internal(mxyyMxzz) + qudricLimitM); + mxyyMxzz += wadjust * (-mxyyMxzz); + ////////////////////////////////////////////////////////////////////////// + // no limiter + //mfbbb += OxyyMxzz * (-mfbbb); + //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); + //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); + //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); + //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); + //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); + //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute inverse linear combinations of second and third order cumulants + //! + mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); + mfaca = c1o3 * (-c2o1* mxxMyy + mxxMzz + mxxPyyPzz); + mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz); + mfcba = ( mxxyMyzz + mxxyPyzz) * c1o2; + mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; + mfcab = ( mxxzMyyz + mxxzPyyz) * c1o2; + mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; + mfbca = ( mxyyMxzz + mxyyPxzz) * c1o2; + mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + //4. + // no limiter + //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc); + CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac); + CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca); + CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc); + CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb); + CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb); + ////////////////////////////////////////////////////////////////////////// + //5. + CUMbcc += O5 * (-CUMbcc); + CUMcbc += O5 * (-CUMcbc); + CUMccb += O5 * (-CUMccb); + ////////////////////////////////////////////////////////////////////////// + //6. + CUMccc += O6 * (-CUMccc); + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + ////////////////////////////////////////////////////////////////////////// + //4. + mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; + mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho; + mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; + mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho))*c1o9; + mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho))*c1o9; + mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho))*c1o9; + ////////////////////////////////////////////////////////////////////////// + //5. + mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho; + mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho; + mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; + ////////////////////////////////////////////////////////////////////////// + //6. + mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb + - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) + - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) + - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho + + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + + c2o1 * (mfcaa * mfaca * mfaac) + + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho + - c1o3 * (mfacc + mfcac + mfcca) * OOrho + - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 + + c1o27*((drho * drho - drho) * OOrho * OOrho)); + //////////////////////////////////////////////////////////////////////////////////// + //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! + mfbaa = -mfbaa; + mfaba = -mfaba; + mfaab = -mfaab; + //////////////////////////////////////////////////////////////////////////////////// + //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> + //! see also Eq. (88)-(96) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////////////// + // X - Dir + VF::LBM::backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); + VF::LBM::backwardChimera( mfaba, mfbba, mfcba, vvx, vx2); + VF::LBM::backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); + VF::LBM::backwardChimera( mfaab, mfbab, mfcab, vvx, vx2); + VF::LBM::backwardChimera( mfabb, mfbbb, mfcbb, vvx, vx2); + VF::LBM::backwardChimera( mfacb, mfbcb, mfccb, vvx, vx2); + VF::LBM::backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); + VF::LBM::backwardChimera( mfabc, mfbbc, mfcbc, vvx, vx2); + VF::LBM::backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9); + //////////////////////////////////////////////////////////////////////////////////// + // Y - Dir + VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); + VF::LBM::backwardChimera( mfaab, mfabb, mfacb, vvy, vy2); + VF::LBM::backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); + VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); + VF::LBM::backwardChimera( mfbab, mfbbb, mfbcb, vvy, vy2); + VF::LBM::backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); + VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); + VF::LBM::backwardChimera( mfcab, mfcbb, mfccb, vvy, vy2); + VF::LBM::backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); + //////////////////////////////////////////////////////////////////////////////////// + // Z - Dir + VF::LBM::backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); + VF::LBM::backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); + VF::LBM::backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); + VF::LBM::backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); + VF::LBM::backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); + VF::LBM::backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); + + + //////////////////////////////////////////////////////////////////////////////////// + //! - Write distributions: style of reading and writing the distributions from/to + //! stored arrays dependent on timestep is based on the esoteric twist algorithm + //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + distribution.f[VF::LBM::DIR::MZZ] = mfcbb; + distribution.f[VF::LBM::DIR::PZZ] = mfabb; + distribution.f[VF::LBM::DIR::ZMZ] = mfbcb; + distribution.f[VF::LBM::DIR::ZPZ] = mfbab; + distribution.f[VF::LBM::DIR::ZZM] = mfbbc; + distribution.f[VF::LBM::DIR::ZZP] = mfbba; + distribution.f[VF::LBM::DIR::MMZ] = mfccb; + distribution.f[VF::LBM::DIR::PPZ] = mfaab; + distribution.f[VF::LBM::DIR::MPZ] = mfcab; + distribution.f[VF::LBM::DIR::PMZ] = mfacb; + distribution.f[VF::LBM::DIR::MZM] = mfcbc; + distribution.f[VF::LBM::DIR::PZP] = mfaba; + distribution.f[VF::LBM::DIR::MZP] = mfcba; + distribution.f[VF::LBM::DIR::PZM] = mfabc; + distribution.f[VF::LBM::DIR::ZMM] = mfbcc; + distribution.f[VF::LBM::DIR::ZPP] = mfbaa; + distribution.f[VF::LBM::DIR::ZMP] = mfbca; + distribution.f[VF::LBM::DIR::ZPM] = mfbac; + distribution.f[VF::LBM::DIR::MMM] = mfccc; + distribution.f[VF::LBM::DIR::PMM] = mfacc; + distribution.f[VF::LBM::DIR::MPM] = mfcac; + distribution.f[VF::LBM::DIR::PPM] = mfaac; + distribution.f[VF::LBM::DIR::MMP] = mfcca; + distribution.f[VF::LBM::DIR::PMP] = mfaca; + distribution.f[VF::LBM::DIR::MPP] = mfcaa; + distribution.f[VF::LBM::DIR::PPP] = mfaaa; + distribution.f[VF::LBM::DIR::ZZZ] = mfbbb; +} + + +} +} + diff --git a/src/lbm/CumulantChimeraPreCompiled.h b/src/lbm/CumulantChimeraPreCompiled.h new file mode 100644 index 000000000..f76e14ac9 --- /dev/null +++ b/src/lbm/CumulantChimeraPreCompiled.h @@ -0,0 +1,59 @@ +#ifndef LBM_CUMULANT_CHIMERA_PRE_H +#define LBM_CUMULANT_CHIMERA_PRE_H + +#ifndef __host__ +#define __host__ +#endif +#ifndef __device__ +#define __device__ +#endif + +#include <cmath> + +#include <basics/Core/DataTypes.h> +#include <basics/Core/RealConstants.h> + +#include "D3Q27.h" +#include "Chimera.h" +#include "MacroscopicQuantities.h" + +namespace VF +{ +namespace LBM +{ + +struct Distribution27 +{ + real f[27]; + + inline __host__ __device__ real getDensity_() const + { + return getDensity(f); + } +}; + + + + +inline __host__ __device__ real abs_internal(real value) +{ +#ifdef __CUDA_ARCH__ + return ::abs(value); +#else + return std::abs(value); +#endif +} + + + +////////////////////////////////////////////////////////////////////////// +//! Cumulant K17 Kernel is based on \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +//! and \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> +////////////////////////////////////////////////////////////////////////// +__host__ __device__ void cumulantChimera(Distribution27& distribution, real omega, real* forces); + +} +} +#endif -- GitLab