diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt index 84dd965d774a84f4e8458529dace0e054f8df010..844e933d5c053aaeef38085c5c5a9e01721ff6aa 100644 --- a/src/gpu/GridGenerator/CMakeLists.txt +++ b/src/gpu/GridGenerator/CMakeLists.txt @@ -6,11 +6,5 @@ vf_add_library(PRIVATE_LINK basics OpenMP::OpenMP_CXX) vf_get_library_name(library_name) set_target_properties(${library_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -# according to linker error when building static libraries. -# https://stackoverflow.com/questions/50033435/cmake-cuda-separate-compilation-static-lib-link-error-on-windows-but-not-on-ubun -if (NOT BUILD_SHARED_LIBRARY) - #set_target_properties(${library_name} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) -endif() - # we want to suppress all cuda warnings so far for this library. target_compile_options(${library_name} PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe "-w" >) diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index d81997df1c1969fcb4c4e097e70e422a8ab627d5..e580cff42c5ee5db3c7a090e62e508c9fd54f14f 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -5,7 +5,7 @@ if(MSVC) set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix endif() -vf_add_library(PRIVATE_LINK ${additional_libraries} GridGenerator basics MPI::MPI_CXX lbmCuda) +vf_add_library(PUBLIC_LINK basics PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX lbmCuda) #SET(TPN_WIN32 "/EHsc") @@ -14,7 +14,4 @@ vf_add_library(PRIVATE_LINK ${additional_libraries} GridGenerator basics MPI::MP set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -#target_compile_options(VirtualFluids_GPU PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-dlto>) - -#target_link_options(VirtualFluids_GPU PRIVATE $<DEVICE_LINK:-dlto>) vf_add_tests() diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu index 138495f6e9274b3b517775d61494810ee1a00cb4..562140eb24bad470604b5782a816a4e60d5000f3 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu @@ -14,176 +14,8 @@ using namespace vf::lbm::constant; #include "lbm/MacroscopicQuantities.h" +#include "../Kernel/Utilities/DistributionHelper.cuh" -__device__ Distributions27 getDistributions(real* DD, unsigned int size_Mat, bool evenOrOdd) -{ - Distributions27 D; - if (evenOrOdd) - { - D.f[dirE ] = &DD[dirE *size_Mat]; - D.f[dirW ] = &DD[dirW *size_Mat]; - D.f[dirN ] = &DD[dirN *size_Mat]; - D.f[dirS ] = &DD[dirS *size_Mat]; - D.f[dirT ] = &DD[dirT *size_Mat]; - D.f[dirB ] = &DD[dirB *size_Mat]; - D.f[dirNE ] = &DD[dirNE *size_Mat]; - D.f[dirSW ] = &DD[dirSW *size_Mat]; - D.f[dirSE ] = &DD[dirSE *size_Mat]; - D.f[dirNW ] = &DD[dirNW *size_Mat]; - D.f[dirTE ] = &DD[dirTE *size_Mat]; - D.f[dirBW ] = &DD[dirBW *size_Mat]; - D.f[dirBE ] = &DD[dirBE *size_Mat]; - D.f[dirTW ] = &DD[dirTW *size_Mat]; - D.f[dirTN ] = &DD[dirTN *size_Mat]; - D.f[dirBS ] = &DD[dirBS *size_Mat]; - D.f[dirBN ] = &DD[dirBN *size_Mat]; - D.f[dirTS ] = &DD[dirTS *size_Mat]; - D.f[dirZERO] = &DD[dirZERO*size_Mat]; - D.f[dirTNE ] = &DD[dirTNE *size_Mat]; - D.f[dirTSW ] = &DD[dirTSW *size_Mat]; - D.f[dirTSE ] = &DD[dirTSE *size_Mat]; - D.f[dirTNW ] = &DD[dirTNW *size_Mat]; - D.f[dirBNE ] = &DD[dirBNE *size_Mat]; - D.f[dirBSW ] = &DD[dirBSW *size_Mat]; - D.f[dirBSE ] = &DD[dirBSE *size_Mat]; - D.f[dirBNW ] = &DD[dirBNW *size_Mat]; - } - else - { - D.f[dirW ] = &DD[dirE *size_Mat]; - D.f[dirE ] = &DD[dirW *size_Mat]; - D.f[dirS ] = &DD[dirN *size_Mat]; - D.f[dirN ] = &DD[dirS *size_Mat]; - D.f[dirB ] = &DD[dirT *size_Mat]; - D.f[dirT ] = &DD[dirB *size_Mat]; - D.f[dirSW ] = &DD[dirNE *size_Mat]; - D.f[dirNE ] = &DD[dirSW *size_Mat]; - D.f[dirNW ] = &DD[dirSE *size_Mat]; - D.f[dirSE ] = &DD[dirNW *size_Mat]; - D.f[dirBW ] = &DD[dirTE *size_Mat]; - D.f[dirTE ] = &DD[dirBW *size_Mat]; - D.f[dirTW ] = &DD[dirBE *size_Mat]; - D.f[dirBE ] = &DD[dirTW *size_Mat]; - D.f[dirBS ] = &DD[dirTN *size_Mat]; - D.f[dirTN ] = &DD[dirBS *size_Mat]; - D.f[dirTS ] = &DD[dirBN *size_Mat]; - D.f[dirBN ] = &DD[dirTS *size_Mat]; - D.f[dirZERO] = &DD[dirZERO*size_Mat]; - D.f[dirTNE ] = &DD[dirBSW *size_Mat]; - D.f[dirTSW ] = &DD[dirBNE *size_Mat]; - D.f[dirTSE ] = &DD[dirBNW *size_Mat]; - D.f[dirTNW ] = &DD[dirBSE *size_Mat]; - D.f[dirBNE ] = &DD[dirTSW *size_Mat]; - D.f[dirBSW ] = &DD[dirTNE *size_Mat]; - D.f[dirBSE ] = &DD[dirTNW *size_Mat]; - D.f[dirBNW ] = &DD[dirTSE *size_Mat]; - } - return D; -} - -struct Distribution27 -{ - real f[27]; -}; - -__device__ Distribution27 getDistribution(real* DD, unsigned int size_Mat, bool evenOrOdd, unsigned int k, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ) -{ - unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - - Distributions27 D = getDistributions(DD, size_Mat, evenOrOdd); - -// real f_dirE = (D.f[dirE])[ke]; -// real f_dirW = (D.f[dirW])[kw]; -// real f_dirN = (D.f[dirN])[kn]; -// real f_dirS = (D.f[dirS])[ks]; -// real f_dirT = (D.f[dirT])[kt]; -// real f_dirB = (D.f[dirB])[kb]; -// -// real f_dirNE = (D.f[dirNE])[kne]; -// real f_dirSW = (D.f[dirSW])[ksw]; -// real f_dirSE = (D.f[dirSE])[kse]; -// real f_dirNW = (D.f[dirNW])[knw]; -// real f_dirTE = (D.f[dirTE])[kte]; -// real f_dirBW = (D.f[dirBW])[kbw]; -// -// real f_dirBE = (D.f[dirBE])[kbe]; -// real f_dirTW = (D.f[dirTW])[ktw]; -// real f_dirTN = (D.f[dirTN])[ktn]; -// real f_dirBS = (D.f[dirBS])[kbs]; -// real f_dirBN = (D.f[dirBN])[kbn]; -// real f_dirTS = (D.f[dirTS])[kts]; -// -// real f_dirZERO = (D.f[dirZERO])[kzero]; -// -// real f_dirTNE = (D.f[dirTNE ])[ktne]; -// real f_dirTSW = (D.f[dirTSW ])[ktsw]; -// real f_dirTSE = (D.f[dirTSE ])[ktse]; -// real f_dirTNW = (D.f[dirTNW ])[ktnw]; -// real f_dirBNE = (D.f[dirBNE ])[kbne]; -// real f_dirBSW = (D.f[dirBSW ])[kbsw]; -// real f_dirBSE = (D.f[dirBSE ])[kbse]; -// real f_dirBNW = (D.f[dirBNW ])[kbnw]; - - -return { - (D.f[dirE])[ke], - (D.f[dirW])[kw], - (D.f[dirN])[kn], - (D.f[dirS])[ks], - (D.f[dirT])[kt], - (D.f[dirB])[kb], - (D.f[dirNE])[kne], - (D.f[dirSW])[ksw], - (D.f[dirSE])[kse], - (D.f[dirNW])[knw], - (D.f[dirTE])[kte], - (D.f[dirBW])[kbw], - (D.f[dirBE])[kbe], - (D.f[dirTW])[ktw], - (D.f[dirTN])[ktn], - (D.f[dirBS])[kbs], - (D.f[dirBN])[kbn], - (D.f[dirTS])[kts], - (D.f[dirZERO])[kzero], - (D.f[dirTNE ])[ktne], - (D.f[dirTSW ])[ktsw], - (D.f[dirTSE ])[ktse], - (D.f[dirTNW ])[ktnw], - (D.f[dirBNE ])[kbne], - (D.f[dirBSW ])[kbsw], - (D.f[dirBSE ])[kbse], - (D.f[dirBNW ])[kbnw] - }; -} //////////////////////////////////////////////////////////////////////////////// extern "C" __global__ void LBCalcMac27( real* vxD, @@ -195,8 +27,8 @@ extern "C" __global__ void LBCalcMac27( real* vxD, unsigned int* neighborY, unsigned int* neighborZ, unsigned int size_Mat, - real* DD, - bool evenOrOdd) + real* distributions, + bool isEvenTimestep) { const unsigned int tx = threadIdx.x; // Thread index = lokaler i index const unsigned int by = blockIdx.x; // Block index x @@ -205,10 +37,8 @@ extern "C" __global__ void LBCalcMac27( real* vxD, const unsigned int y = by + STARTOFFY; // Globaler y-Index const unsigned int z = bz + STARTOFFZ; // Globaler z-Index - const unsigned sizeX = blockDim.x; - const unsigned sizeY = gridDim.x; - const unsigned nx = sizeX + 2 * STARTOFFX; - const unsigned ny = sizeY + 2 * STARTOFFY; + const unsigned nx = blockDim.x + 2 * STARTOFFX; + const unsigned ny = gridDim.x + 2 * STARTOFFY; const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device @@ -217,15 +47,17 @@ extern "C" __global__ void LBCalcMac27( real* vxD, vyD[k] = c0o1; vzD[k] = c0o1; - if(geoD[k] == GEO_FLUID) - { - const auto distribution = getDistribution(DD, size_Mat, evenOrOdd, k, neighborX, neighborY, neighborZ); + if(!vf::gpu::isValidFluidNode(k, size_Mat, geoD[k])) + return; + + vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ); + const auto& distribution = distr_wrapper.distribution; + + rhoD[k] = vf::lbm::getDensity(distribution.f); + vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f); + vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f); + vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f); - rhoD[k] = vf::lbm::getDensity(distribution.f); - vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f); - vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f); - vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f); - } } @@ -426,251 +258,34 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD, } - - - - - - - - - - - - - - - - - - - - - - - - - - //////////////////////////////////////////////////////////////////////////////// -extern "C" __global__ void LBCalcMacCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool evenOrOdd) +extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, real *rhoD, real *pressD, + unsigned int *geoD, unsigned int *neighborX, unsigned int *neighborY, + unsigned int *neighborZ, unsigned int size_Mat, real *distributions, + bool isEvenTimestep) { - Distributions27 D; - if (evenOrOdd==true) - { - D.f[dirE ] = &DD[dirE *size_Mat]; - D.f[dirW ] = &DD[dirW *size_Mat]; - D.f[dirN ] = &DD[dirN *size_Mat]; - D.f[dirS ] = &DD[dirS *size_Mat]; - D.f[dirT ] = &DD[dirT *size_Mat]; - D.f[dirB ] = &DD[dirB *size_Mat]; - D.f[dirNE ] = &DD[dirNE *size_Mat]; - D.f[dirSW ] = &DD[dirSW *size_Mat]; - D.f[dirSE ] = &DD[dirSE *size_Mat]; - D.f[dirNW ] = &DD[dirNW *size_Mat]; - D.f[dirTE ] = &DD[dirTE *size_Mat]; - D.f[dirBW ] = &DD[dirBW *size_Mat]; - D.f[dirBE ] = &DD[dirBE *size_Mat]; - D.f[dirTW ] = &DD[dirTW *size_Mat]; - D.f[dirTN ] = &DD[dirTN *size_Mat]; - D.f[dirBS ] = &DD[dirBS *size_Mat]; - D.f[dirBN ] = &DD[dirBN *size_Mat]; - D.f[dirTS ] = &DD[dirTS *size_Mat]; - D.f[dirZERO] = &DD[dirZERO*size_Mat]; - D.f[dirTNE ] = &DD[dirTNE *size_Mat]; - D.f[dirTSW ] = &DD[dirTSW *size_Mat]; - D.f[dirTSE ] = &DD[dirTSE *size_Mat]; - D.f[dirTNW ] = &DD[dirTNW *size_Mat]; - D.f[dirBNE ] = &DD[dirBNE *size_Mat]; - D.f[dirBSW ] = &DD[dirBSW *size_Mat]; - D.f[dirBSE ] = &DD[dirBSE *size_Mat]; - D.f[dirBNW ] = &DD[dirBNW *size_Mat]; - } - else - { - D.f[dirW ] = &DD[dirE *size_Mat]; - D.f[dirE ] = &DD[dirW *size_Mat]; - D.f[dirS ] = &DD[dirN *size_Mat]; - D.f[dirN ] = &DD[dirS *size_Mat]; - D.f[dirB ] = &DD[dirT *size_Mat]; - D.f[dirT ] = &DD[dirB *size_Mat]; - D.f[dirSW ] = &DD[dirNE *size_Mat]; - D.f[dirNE ] = &DD[dirSW *size_Mat]; - D.f[dirNW ] = &DD[dirSE *size_Mat]; - D.f[dirSE ] = &DD[dirNW *size_Mat]; - D.f[dirBW ] = &DD[dirTE *size_Mat]; - D.f[dirTE ] = &DD[dirBW *size_Mat]; - D.f[dirTW ] = &DD[dirBE *size_Mat]; - D.f[dirBE ] = &DD[dirTW *size_Mat]; - D.f[dirBS ] = &DD[dirTN *size_Mat]; - D.f[dirTN ] = &DD[dirBS *size_Mat]; - D.f[dirTS ] = &DD[dirBN *size_Mat]; - D.f[dirBN ] = &DD[dirTS *size_Mat]; - D.f[dirZERO] = &DD[dirZERO*size_Mat]; - D.f[dirTNE ] = &DD[dirBSW *size_Mat]; - D.f[dirTSW ] = &DD[dirBNE *size_Mat]; - D.f[dirTSE ] = &DD[dirBNW *size_Mat]; - D.f[dirTNW ] = &DD[dirBSE *size_Mat]; - D.f[dirBNE ] = &DD[dirTSW *size_Mat]; - D.f[dirBSW ] = &DD[dirTNE *size_Mat]; - D.f[dirBSE ] = &DD[dirTNW *size_Mat]; - D.f[dirBNW ] = &DD[dirTSE *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID || geoD[k] == GEO_PM_0 || geoD[k] == GEO_PM_1 || geoD[k] == GEO_PM_2) - { - rhoD[k] = (D.f[dirE ])[ke ]+ (D.f[dirW ])[kw ]+ - (D.f[dirN ])[kn ]+ (D.f[dirS ])[ks ]+ - (D.f[dirT ])[kt ]+ (D.f[dirB ])[kb ]+ - (D.f[dirNE ])[kne ]+ (D.f[dirSW ])[ksw ]+ - (D.f[dirSE ])[kse ]+ (D.f[dirNW ])[knw ]+ - (D.f[dirTE ])[kte ]+ (D.f[dirBW ])[kbw ]+ - (D.f[dirBE ])[kbe ]+ (D.f[dirTW ])[ktw ]+ - (D.f[dirTN ])[ktn ]+ (D.f[dirBS ])[kbs ]+ - (D.f[dirBN ])[kbn ]+ (D.f[dirTS ])[kts ]+ - (D.f[dirZERO])[kzero]+ - (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ - (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ - (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ - (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw]; - - vxD[k] = ((D.f[dirE ])[ke ]- (D.f[dirW ])[kw ]+ - (D.f[dirNE ])[kne ]- (D.f[dirSW ])[ksw ]+ - (D.f[dirSE ])[kse ]- (D.f[dirNW ])[knw ]+ - (D.f[dirTE ])[kte ]- (D.f[dirBW ])[kbw ]+ - (D.f[dirBE ])[kbe ]- (D.f[dirTW ])[ktw ]+ - (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]+ - (D.f[dirTSE ])[ktse]- (D.f[dirTNW ])[ktnw]+ - (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]+ - (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw]) / (c1o1 + rhoD[k]); - - vyD[k] = ((D.f[dirN ])[kn ]- (D.f[dirS ])[ks ]+ - (D.f[dirNE ])[kne ]- (D.f[dirSW ])[ksw ]- - (D.f[dirSE ])[kse ]+ (D.f[dirNW ])[knw ]+ - (D.f[dirTN ])[ktn ]- (D.f[dirBS ])[kbs ]+ - (D.f[dirBN ])[kbn ]- (D.f[dirTS ])[kts ]+ - (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]- - (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ - (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- - (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw]) / (c1o1 + rhoD[k]); - - vzD[k] = ((D.f[dirT ])[kt ]- (D.f[dirB ])[kb ]+ - (D.f[dirTE ])[kte ]- (D.f[dirBW ])[kbw ]- - (D.f[dirBE ])[kbe ]+ (D.f[dirTW ])[ktw ]+ - (D.f[dirTN ])[ktn ]- (D.f[dirBS ])[kbs ]- - (D.f[dirBN ])[kbn ]+ (D.f[dirTS ])[kts ]+ - (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ - (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]- - (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- - (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw]) / (c1o1 + rhoD[k]); - - pressD[k] = ((D.f[dirE ])[ke ]+ (D.f[dirW ])[kw ]+ - (D.f[dirN ])[kn ]+ (D.f[dirS ])[ks ]+ - (D.f[dirT ])[kt ]+ (D.f[dirB ])[kb ]+ - c2o1*( - (D.f[dirNE ])[kne ]+ (D.f[dirSW ])[ksw ]+ - (D.f[dirSE ])[kse ]+ (D.f[dirNW ])[knw ]+ - (D.f[dirTE ])[kte ]+ (D.f[dirBW ])[kbw ]+ - (D.f[dirBE ])[kbe ]+ (D.f[dirTW ])[ktw ]+ - (D.f[dirTN ])[ktn ]+ (D.f[dirBS ])[kbs ]+ - (D.f[dirBN ])[kbn ]+ (D.f[dirTS ])[kts ])+ - c3o1*( - (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ - (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ - (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ - (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - - } - } + const unsigned k = vf::gpu::getNodeIndex(); + + pressD[k] = c0o1; + rhoD[k] = c0o1; + vxD[k] = c0o1; + vyD[k] = c0o1; + vzD[k] = c0o1; + + if (!vf::gpu::isValidFluidNode(k, size_Mat, geoD[k])) + return; + + vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, + neighborZ); + const auto &distribution = distr_wrapper.distribution; + + rhoD[k] = vf::lbm::getDensity(distribution.f); + vxD[k] = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]); + vyD[k] = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]); + vzD[k] = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]); + pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); } - - - - - - - - - - - - - - - - - - - - - - - - - - //////////////////////////////////////////////////////////////////////////////// extern "C" __global__ void LBCalcMacThS7( real* Conc, unsigned int* geoD, diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu index 4782f1cb2cc48b84e86fbb6e4a6eaf752afc3606..dae9436bf1350f0ddced35ea4f7a6c63a563b922 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu @@ -28,12 +28,18 @@ void CumulantK17Unified::run() dim3 threads(numberOfThreads, 1, 1); vf::gpu::LB_Kernel_CumulantK17Unified<<<grid, threads>>>( - para->getParD(level)->omega, para->getParD(level)->geoSP, para->getParD(level)->neighborX_SP, - para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, para->getParD(level)->d0SP.f[0], - para->getParD(level)->size_Mat_SP, level, para->getForcesDev(), para->getQuadricLimitersDev(), + para->getParD(level)->omega, + para->getParD(level)->geoSP, + para->getParD(level)->neighborX_SP, + para->getParD(level)->neighborY_SP, + para->getParD(level)->neighborZ_SP, + para->getParD(level)->d0SP.f[0], + para->getParD(level)->size_Mat_SP, + level, + para->getForcesDev(), para->getParD(level)->evenOrOdd); - getLastCudaError("LB_Kernel_CumulantK17Comp execution failed"); + getLastCudaError("LB_Kernel_CumulantK17Unified execution failed"); } CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh index f1d9d2ec9e7abd3d2c4df853b543a4c25792e1bd..ed8cc6e66343b4367e0addbc05f4763f4c2a957d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh @@ -9,13 +9,11 @@ namespace vf { namespace gpu { - -extern "C" __global__ void LB_Kernel_CumulantK17Unified(real omega, unsigned int *bcMatD, unsigned int *neighborX, +__global__ void LB_Kernel_CumulantK17Unified(real omega, unsigned int *bcMatD, unsigned int *neighborX, unsigned int *neighborY, unsigned int *neighborZ, real *DDStart, - int size_Mat, int level, real *forces, real *quadricLimiters, + int size_Mat, int level, real *forces, bool EvenOrOdd); - } } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu index 6b6053915ceff777bd54dc5ed9e5d5acfd07d7ae..973a919623533561a36ea57aed8599045b2aba19 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu @@ -41,7 +41,7 @@ namespace vf namespace gpu { -extern "C" __global__ void LB_Kernel_CumulantK17Unified( +__global__ void LB_Kernel_CumulantK17Unified( real omega, uint* typeOfGridNode, uint* neighborX, diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu index 29c70d39ac7d2900160ce164ef31265d34ac64df..c36af216ecf39e7c62c98ad932c07987427c879f 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu @@ -7,178 +7,163 @@ namespace vf namespace gpu { -__device__ Distributions27 getDistributions27(real* distributions, unsigned int size_Mat, bool isEvenTimestep) +__device__ __host__ Distributions27 getDistributions27(real *distributions, unsigned int size_Mat, bool isEvenTimestep) { + Distributions27 dist; - if (isEvenTimestep) - { - dist.f[dirE ] = &distributions[dirE *size_Mat]; - dist.f[dirW ] = &distributions[dirW *size_Mat]; - dist.f[dirN ] = &distributions[dirN *size_Mat]; - dist.f[dirS ] = &distributions[dirS *size_Mat]; - dist.f[dirT ] = &distributions[dirT *size_Mat]; - dist.f[dirB ] = &distributions[dirB *size_Mat]; - dist.f[dirNE ] = &distributions[dirNE *size_Mat]; - dist.f[dirSW ] = &distributions[dirSW *size_Mat]; - dist.f[dirSE ] = &distributions[dirSE *size_Mat]; - dist.f[dirNW ] = &distributions[dirNW *size_Mat]; - dist.f[dirTE ] = &distributions[dirTE *size_Mat]; - dist.f[dirBW ] = &distributions[dirBW *size_Mat]; - dist.f[dirBE ] = &distributions[dirBE *size_Mat]; - dist.f[dirTW ] = &distributions[dirTW *size_Mat]; - dist.f[dirTN ] = &distributions[dirTN *size_Mat]; - dist.f[dirBS ] = &distributions[dirBS *size_Mat]; - dist.f[dirBN ] = &distributions[dirBN *size_Mat]; - dist.f[dirTS ] = &distributions[dirTS *size_Mat]; - dist.f[dirREST] = &distributions[dirREST*size_Mat]; - dist.f[dirTNE ] = &distributions[dirTNE *size_Mat]; - dist.f[dirTSW ] = &distributions[dirTSW *size_Mat]; - dist.f[dirTSE ] = &distributions[dirTSE *size_Mat]; - dist.f[dirTNW ] = &distributions[dirTNW *size_Mat]; - dist.f[dirBNE ] = &distributions[dirBNE *size_Mat]; - dist.f[dirBSW ] = &distributions[dirBSW *size_Mat]; - dist.f[dirBSE ] = &distributions[dirBSE *size_Mat]; - dist.f[dirBNW ] = &distributions[dirBNW *size_Mat]; - } - else - { - dist.f[dirW ] = &distributions[dirE *size_Mat]; - dist.f[dirE ] = &distributions[dirW *size_Mat]; - dist.f[dirS ] = &distributions[dirN *size_Mat]; - dist.f[dirN ] = &distributions[dirS *size_Mat]; - dist.f[dirB ] = &distributions[dirT *size_Mat]; - dist.f[dirT ] = &distributions[dirB *size_Mat]; - dist.f[dirSW ] = &distributions[dirNE *size_Mat]; - dist.f[dirNE ] = &distributions[dirSW *size_Mat]; - dist.f[dirNW ] = &distributions[dirSE *size_Mat]; - dist.f[dirSE ] = &distributions[dirNW *size_Mat]; - dist.f[dirBW ] = &distributions[dirTE *size_Mat]; - dist.f[dirTE ] = &distributions[dirBW *size_Mat]; - dist.f[dirTW ] = &distributions[dirBE *size_Mat]; - dist.f[dirBE ] = &distributions[dirTW *size_Mat]; - dist.f[dirBS ] = &distributions[dirTN *size_Mat]; - dist.f[dirTN ] = &distributions[dirBS *size_Mat]; - dist.f[dirTS ] = &distributions[dirBN *size_Mat]; - dist.f[dirBN ] = &distributions[dirTS *size_Mat]; - dist.f[dirREST] = &distributions[dirREST*size_Mat]; - dist.f[dirBSW ] = &distributions[dirTNE *size_Mat]; - dist.f[dirBNE ] = &distributions[dirTSW *size_Mat]; - dist.f[dirBNW ] = &distributions[dirTSE *size_Mat]; - dist.f[dirBSE ] = &distributions[dirTNW *size_Mat]; - dist.f[dirTSW ] = &distributions[dirBNE *size_Mat]; - dist.f[dirTNE ] = &distributions[dirBSW *size_Mat]; - dist.f[dirTNW ] = &distributions[dirBSE *size_Mat]; - dist.f[dirTSE ] = &distributions[dirBNW *size_Mat]; + + if (isEvenTimestep) { + dist.f[dirE] = &distributions[dirE * size_Mat]; + dist.f[dirW] = &distributions[dirW * size_Mat]; + dist.f[dirN] = &distributions[dirN * size_Mat]; + dist.f[dirS] = &distributions[dirS * size_Mat]; + dist.f[dirT] = &distributions[dirT * size_Mat]; + dist.f[dirB] = &distributions[dirB * size_Mat]; + dist.f[dirNE] = &distributions[dirNE * size_Mat]; + dist.f[dirSW] = &distributions[dirSW * size_Mat]; + dist.f[dirSE] = &distributions[dirSE * size_Mat]; + dist.f[dirNW] = &distributions[dirNW * size_Mat]; + dist.f[dirTE] = &distributions[dirTE * size_Mat]; + dist.f[dirBW] = &distributions[dirBW * size_Mat]; + dist.f[dirBE] = &distributions[dirBE * size_Mat]; + dist.f[dirTW] = &distributions[dirTW * size_Mat]; + dist.f[dirTN] = &distributions[dirTN * size_Mat]; + dist.f[dirBS] = &distributions[dirBS * size_Mat]; + dist.f[dirBN] = &distributions[dirBN * size_Mat]; + dist.f[dirTS] = &distributions[dirTS * size_Mat]; + dist.f[dirREST] = &distributions[dirREST * size_Mat]; + dist.f[dirTNE] = &distributions[dirTNE * size_Mat]; + dist.f[dirTSW] = &distributions[dirTSW * size_Mat]; + dist.f[dirTSE] = &distributions[dirTSE * size_Mat]; + dist.f[dirTNW] = &distributions[dirTNW * size_Mat]; + dist.f[dirBNE] = &distributions[dirBNE * size_Mat]; + dist.f[dirBSW] = &distributions[dirBSW * size_Mat]; + dist.f[dirBSE] = &distributions[dirBSE * size_Mat]; + dist.f[dirBNW] = &distributions[dirBNW * size_Mat]; + } else { + dist.f[dirW] = &distributions[dirE * size_Mat]; + dist.f[dirE] = &distributions[dirW * size_Mat]; + dist.f[dirS] = &distributions[dirN * size_Mat]; + dist.f[dirN] = &distributions[dirS * size_Mat]; + dist.f[dirB] = &distributions[dirT * size_Mat]; + dist.f[dirT] = &distributions[dirB * size_Mat]; + dist.f[dirSW] = &distributions[dirNE * size_Mat]; + dist.f[dirNE] = &distributions[dirSW * size_Mat]; + dist.f[dirNW] = &distributions[dirSE * size_Mat]; + dist.f[dirSE] = &distributions[dirNW * size_Mat]; + dist.f[dirBW] = &distributions[dirTE * size_Mat]; + dist.f[dirTE] = &distributions[dirBW * size_Mat]; + dist.f[dirTW] = &distributions[dirBE * size_Mat]; + dist.f[dirBE] = &distributions[dirTW * size_Mat]; + dist.f[dirBS] = &distributions[dirTN * size_Mat]; + dist.f[dirTN] = &distributions[dirBS * size_Mat]; + dist.f[dirTS] = &distributions[dirBN * size_Mat]; + dist.f[dirBN] = &distributions[dirTS * size_Mat]; + dist.f[dirREST] = &distributions[dirREST * size_Mat]; + dist.f[dirBSW] = &distributions[dirTNE * size_Mat]; + dist.f[dirBNE] = &distributions[dirTSW * size_Mat]; + dist.f[dirBNW] = &distributions[dirTSE * size_Mat]; + dist.f[dirBSE] = &distributions[dirTNW * size_Mat]; + dist.f[dirTSW] = &distributions[dirBNE * size_Mat]; + dist.f[dirTNE] = &distributions[dirBSW * size_Mat]; + dist.f[dirTNW] = &distributions[dirBSE * size_Mat]; + dist.f[dirTSE] = &distributions[dirBNW * size_Mat]; } return dist; } - -__device__ DistributionWrapper::DistributionWrapper( - real* distributions, - unsigned int size_Mat, - bool isEvenTimestep, - uint k, - uint* neighborX, - uint* neighborY, - uint* neighborZ) : - dist(getDistributions27(distributions, size_Mat, isEvenTimestep)), - k(k), - kw (neighborX[k]), - ks (neighborY[k]), - kb (neighborZ[k]), - ksw (neighborY[kw]), - kbw (neighborZ[kw]), - kbs (neighborZ[ks]), - kbsw(neighborZ[ksw]) -{ +__device__ DistributionWrapper::DistributionWrapper(real *distributions, unsigned int size_Mat, bool isEvenTimestep, + uint k, uint *neighborX, uint *neighborY, uint *neighborZ) + : dist(getDistributions27(distributions, size_Mat, isEvenTimestep)), k(k), kw(neighborX[k]), ks(neighborY[k]), + kb(neighborZ[k]), ksw(neighborY[kw]), kbw(neighborZ[kw]), kbs(neighborZ[ks]), kbsw(neighborZ[ksw]) +{ read(); } __device__ void DistributionWrapper::read() { - distribution.f[vf::lbm::dir::PZZ] = (dist.f[dirE ])[k]; - distribution.f[vf::lbm::dir::MZZ] = (dist.f[dirW ])[kw]; - distribution.f[vf::lbm::dir::ZPZ] = (dist.f[dirN ])[k]; - distribution.f[vf::lbm::dir::ZMZ] = (dist.f[dirS ])[ks]; - distribution.f[vf::lbm::dir::ZZP] = (dist.f[dirT ])[k]; - distribution.f[vf::lbm::dir::ZZM] = (dist.f[dirB ])[kb]; - distribution.f[vf::lbm::dir::PPZ] = (dist.f[dirNE ])[k]; - distribution.f[vf::lbm::dir::MMZ] = (dist.f[dirSW ])[ksw]; - distribution.f[vf::lbm::dir::PMZ] = (dist.f[dirSE ])[ks]; - distribution.f[vf::lbm::dir::MPZ] = (dist.f[dirNW ])[kw]; - distribution.f[vf::lbm::dir::PZP] = (dist.f[dirTE ])[k]; - distribution.f[vf::lbm::dir::MZM] = (dist.f[dirBW ])[kbw]; - distribution.f[vf::lbm::dir::PZM] = (dist.f[dirBE ])[kb]; - distribution.f[vf::lbm::dir::MZP] = (dist.f[dirTW ])[kw]; - distribution.f[vf::lbm::dir::ZPP] = (dist.f[dirTN ])[k]; - distribution.f[vf::lbm::dir::ZMM] = (dist.f[dirBS ])[kbs]; - distribution.f[vf::lbm::dir::ZPM] = (dist.f[dirBN ])[kb]; - distribution.f[vf::lbm::dir::ZMP] = (dist.f[dirTS ])[ks]; - distribution.f[vf::lbm::dir::PPP] = (dist.f[dirTNE ])[k]; - distribution.f[vf::lbm::dir::MPP] = (dist.f[dirTNW ])[kw]; - distribution.f[vf::lbm::dir::PMP] = (dist.f[dirTSE ])[ks]; - distribution.f[vf::lbm::dir::MMP] = (dist.f[dirTSW ])[ksw]; - distribution.f[vf::lbm::dir::PPM] = (dist.f[dirBNE ])[kb]; - distribution.f[vf::lbm::dir::MPM] = (dist.f[dirBNW ])[kbw]; - distribution.f[vf::lbm::dir::PMM] = (dist.f[dirBSE ])[kbs]; - distribution.f[vf::lbm::dir::MMM] = (dist.f[dirBSW ])[kbsw]; + distribution.f[vf::lbm::dir::PZZ] = (dist.f[dirE])[k]; + distribution.f[vf::lbm::dir::MZZ] = (dist.f[dirW])[kw]; + distribution.f[vf::lbm::dir::ZPZ] = (dist.f[dirN])[k]; + distribution.f[vf::lbm::dir::ZMZ] = (dist.f[dirS])[ks]; + distribution.f[vf::lbm::dir::ZZP] = (dist.f[dirT])[k]; + distribution.f[vf::lbm::dir::ZZM] = (dist.f[dirB])[kb]; + distribution.f[vf::lbm::dir::PPZ] = (dist.f[dirNE])[k]; + distribution.f[vf::lbm::dir::MMZ] = (dist.f[dirSW])[ksw]; + distribution.f[vf::lbm::dir::PMZ] = (dist.f[dirSE])[ks]; + distribution.f[vf::lbm::dir::MPZ] = (dist.f[dirNW])[kw]; + distribution.f[vf::lbm::dir::PZP] = (dist.f[dirTE])[k]; + distribution.f[vf::lbm::dir::MZM] = (dist.f[dirBW])[kbw]; + distribution.f[vf::lbm::dir::PZM] = (dist.f[dirBE])[kb]; + distribution.f[vf::lbm::dir::MZP] = (dist.f[dirTW])[kw]; + distribution.f[vf::lbm::dir::ZPP] = (dist.f[dirTN])[k]; + distribution.f[vf::lbm::dir::ZMM] = (dist.f[dirBS])[kbs]; + distribution.f[vf::lbm::dir::ZPM] = (dist.f[dirBN])[kb]; + distribution.f[vf::lbm::dir::ZMP] = (dist.f[dirTS])[ks]; + distribution.f[vf::lbm::dir::PPP] = (dist.f[dirTNE])[k]; + distribution.f[vf::lbm::dir::MPP] = (dist.f[dirTNW])[kw]; + distribution.f[vf::lbm::dir::PMP] = (dist.f[dirTSE])[ks]; + distribution.f[vf::lbm::dir::MMP] = (dist.f[dirTSW])[ksw]; + distribution.f[vf::lbm::dir::PPM] = (dist.f[dirBNE])[kb]; + distribution.f[vf::lbm::dir::MPM] = (dist.f[dirBNW])[kbw]; + distribution.f[vf::lbm::dir::PMM] = (dist.f[dirBSE])[kbs]; + distribution.f[vf::lbm::dir::MMM] = (dist.f[dirBSW])[kbsw]; distribution.f[vf::lbm::dir::ZZZ] = (dist.f[dirREST])[k]; } __device__ void DistributionWrapper::write() { - (dist.f[dirE ])[k] = distribution.f[vf::lbm::dir::PZZ]; - (dist.f[dirW ])[kw] = distribution.f[vf::lbm::dir::MZZ]; - (dist.f[dirN ])[k] = distribution.f[vf::lbm::dir::ZPZ]; - (dist.f[dirS ])[ks] = distribution.f[vf::lbm::dir::ZMZ]; - (dist.f[dirT ])[k] = distribution.f[vf::lbm::dir::ZZP]; - (dist.f[dirB ])[kb] = distribution.f[vf::lbm::dir::ZZM]; - (dist.f[dirNE ])[k] = distribution.f[vf::lbm::dir::PPZ]; - (dist.f[dirSW ])[ksw] = distribution.f[vf::lbm::dir::MMZ]; - (dist.f[dirSE ])[ks] = distribution.f[vf::lbm::dir::PMZ]; - (dist.f[dirNW ])[kw] = distribution.f[vf::lbm::dir::MPZ]; - (dist.f[dirTE ])[k] = distribution.f[vf::lbm::dir::PZP]; - (dist.f[dirBW ])[kbw] = distribution.f[vf::lbm::dir::MZM]; - (dist.f[dirBE ])[kb] = distribution.f[vf::lbm::dir::PZM]; - (dist.f[dirTW ])[kw] = distribution.f[vf::lbm::dir::MZP]; - (dist.f[dirTN ])[k] = distribution.f[vf::lbm::dir::ZPP]; - (dist.f[dirBS ])[kbs] = distribution.f[vf::lbm::dir::ZMM]; - (dist.f[dirBN ])[kb] = distribution.f[vf::lbm::dir::ZPM]; - (dist.f[dirTS ])[ks] = distribution.f[vf::lbm::dir::ZMP]; - (dist.f[dirTNE ])[k] = distribution.f[vf::lbm::dir::PPP]; - (dist.f[dirTNW ])[kw] = distribution.f[vf::lbm::dir::MPP]; - (dist.f[dirTSE ])[ks] = distribution.f[vf::lbm::dir::PMP]; - (dist.f[dirTSW ])[ksw] = distribution.f[vf::lbm::dir::MMP]; - (dist.f[dirBNE ])[kb] = distribution.f[vf::lbm::dir::PPM]; - (dist.f[dirBNW ])[kbw] = distribution.f[vf::lbm::dir::MPM]; - (dist.f[dirBSE ])[kbs] = distribution.f[vf::lbm::dir::PMM]; - (dist.f[dirBSW ])[kbsw] = distribution.f[vf::lbm::dir::MMM]; - (dist.f[dirREST])[k] = distribution.f[vf::lbm::dir::ZZZ]; + (dist.f[dirE])[k] = distribution.f[vf::lbm::dir::PZZ]; + (dist.f[dirW])[kw] = distribution.f[vf::lbm::dir::MZZ]; + (dist.f[dirN])[k] = distribution.f[vf::lbm::dir::ZPZ]; + (dist.f[dirS])[ks] = distribution.f[vf::lbm::dir::ZMZ]; + (dist.f[dirT])[k] = distribution.f[vf::lbm::dir::ZZP]; + (dist.f[dirB])[kb] = distribution.f[vf::lbm::dir::ZZM]; + (dist.f[dirNE])[k] = distribution.f[vf::lbm::dir::PPZ]; + (dist.f[dirSW])[ksw] = distribution.f[vf::lbm::dir::MMZ]; + (dist.f[dirSE])[ks] = distribution.f[vf::lbm::dir::PMZ]; + (dist.f[dirNW])[kw] = distribution.f[vf::lbm::dir::MPZ]; + (dist.f[dirTE])[k] = distribution.f[vf::lbm::dir::PZP]; + (dist.f[dirBW])[kbw] = distribution.f[vf::lbm::dir::MZM]; + (dist.f[dirBE])[kb] = distribution.f[vf::lbm::dir::PZM]; + (dist.f[dirTW])[kw] = distribution.f[vf::lbm::dir::MZP]; + (dist.f[dirTN])[k] = distribution.f[vf::lbm::dir::ZPP]; + (dist.f[dirBS])[kbs] = distribution.f[vf::lbm::dir::ZMM]; + (dist.f[dirBN])[kb] = distribution.f[vf::lbm::dir::ZPM]; + (dist.f[dirTS])[ks] = distribution.f[vf::lbm::dir::ZMP]; + (dist.f[dirTNE])[k] = distribution.f[vf::lbm::dir::PPP]; + (dist.f[dirTNW])[kw] = distribution.f[vf::lbm::dir::MPP]; + (dist.f[dirTSE])[ks] = distribution.f[vf::lbm::dir::PMP]; + (dist.f[dirTSW])[ksw] = distribution.f[vf::lbm::dir::MMP]; + (dist.f[dirBNE])[kb] = distribution.f[vf::lbm::dir::PPM]; + (dist.f[dirBNW])[kbw] = distribution.f[vf::lbm::dir::MPM]; + (dist.f[dirBSE])[kbs] = distribution.f[vf::lbm::dir::PMM]; + (dist.f[dirBSW])[kbsw] = distribution.f[vf::lbm::dir::MMM]; + (dist.f[dirREST])[k] = distribution.f[vf::lbm::dir::ZZZ]; } __device__ unsigned int getNodeIndex() { - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; + const unsigned x = threadIdx.x; + const unsigned y = blockIdx.x; + const unsigned z = blockIdx.y; const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; - return nx*(ny*z + y) + x; + return nx * (ny * z + y) + x; } __device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType) { - return (k < size_Mat) && (nodeType == GEO_FLUID); + return (k < size_Mat) && + (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2); } -__device__ void getLevelForce(real fx, real fy, real fz, int level, real* forces) +__device__ void getLevelForce(real fx, real fy, real fz, int level, real *forces) { - real fx_t {1.}, fy_t {1.}, fz_t {1.}; - for (int i = 0; i < level; i++) - { + real fx_t{ 1. }, fy_t{ 1. }, fz_t{ 1. }; + for (int i = 0; i < level; i++) { fx_t *= vf::lbm::constant::c2o1; fy_t *= vf::lbm::constant::c2o1; fz_t *= vf::lbm::constant::c2o1; @@ -189,6 +174,5 @@ __device__ void getLevelForce(real fx, real fy, real fz, int level, real* forces forces[2] = fz / fz_t; } - -} -} +} // namespace gpu +} // namespace vf diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh index d69a74af2142837aba4bdedd10fe05c6de04ec84..60bdfd7e51dbfd100100464a727579e2fcb6a253 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh @@ -44,7 +44,7 @@ namespace vf namespace gpu { -__device__ Distributions27 getDistributions27(real* distributions, unsigned int size_Mat, bool isEvenTimestep); +__device__ __host__ Distributions27 getDistributions27(real* distributions, unsigned int size_Mat, bool isEvenTimestep); struct DistributionWrapper { diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d0e620ad83fd43d5654c450a9fc97936e1483b4 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp @@ -0,0 +1,93 @@ +#include <gmock/gmock.h> + +#include "DistributionHelper.cuh" + +#include "LBM/D3Q27.h" + + +auto RealEq = [](auto value) { +#ifdef VF_DOUBLE_ACCURACY + return testing::DoubleEq(value); +#else + return testing::FloatEq(value); +#endif +}; + + +TEST(DistributionHelperTests, getPointerToDistribution_WhenEvenTimeStep_ShouldBeEqualToInput) +{ + real distributions_in[27]; + for (int i = 0; i < 27; i++) + distributions_in[i] = i; + const uint size_Mat = 1; + const bool isEvenTimeStep = true; + + Distributions27 distribution_out = vf::gpu::getDistributions27(distributions_in, size_Mat, isEvenTimeStep); + + EXPECT_THAT(*distribution_out.f[dirE], RealEq(distributions_in[dirE])); + EXPECT_THAT(*distribution_out.f[dirW], RealEq(distributions_in[dirW])); + EXPECT_THAT(*distribution_out.f[dirN], RealEq(distributions_in[dirN])); + EXPECT_THAT(*distribution_out.f[dirS], RealEq(distributions_in[dirS])); + EXPECT_THAT(*distribution_out.f[dirT], RealEq(distributions_in[dirT])); + EXPECT_THAT(*distribution_out.f[dirB], RealEq(distributions_in[dirB])); + EXPECT_THAT(*distribution_out.f[dirNE], RealEq(distributions_in[dirNE])); + EXPECT_THAT(*distribution_out.f[dirSW], RealEq(distributions_in[dirSW])); + EXPECT_THAT(*distribution_out.f[dirSE], RealEq(distributions_in[dirSE])); + EXPECT_THAT(*distribution_out.f[dirNW], RealEq(distributions_in[dirNW])); + EXPECT_THAT(*distribution_out.f[dirTE], RealEq(distributions_in[dirTE])); + EXPECT_THAT(*distribution_out.f[dirBW], RealEq(distributions_in[dirBW])); + EXPECT_THAT(*distribution_out.f[dirBE], RealEq(distributions_in[dirBE])); + EXPECT_THAT(*distribution_out.f[dirTW], RealEq(distributions_in[dirTW])); + EXPECT_THAT(*distribution_out.f[dirTN], RealEq(distributions_in[dirTN])); + EXPECT_THAT(*distribution_out.f[dirBS], RealEq(distributions_in[dirBS])); + EXPECT_THAT(*distribution_out.f[dirBN], RealEq(distributions_in[dirBN])); + EXPECT_THAT(*distribution_out.f[dirTS], RealEq(distributions_in[dirTS])); + EXPECT_THAT(*distribution_out.f[dirREST], RealEq(distributions_in[dirREST])); + EXPECT_THAT(*distribution_out.f[dirTNE], RealEq(distributions_in[dirTNE])); + EXPECT_THAT(*distribution_out.f[dirTSW], RealEq(distributions_in[dirTSW])); + EXPECT_THAT(*distribution_out.f[dirTSE], RealEq(distributions_in[dirTSE])); + EXPECT_THAT(*distribution_out.f[dirTNW], RealEq(distributions_in[dirTNW])); + EXPECT_THAT(*distribution_out.f[dirBNE], RealEq(distributions_in[dirBNE])); + EXPECT_THAT(*distribution_out.f[dirBSW], RealEq(distributions_in[dirBSW])); + EXPECT_THAT(*distribution_out.f[dirBSE], RealEq(distributions_in[dirBSE])); + EXPECT_THAT(*distribution_out.f[dirBNW], RealEq(distributions_in[dirBNW])); +} + +TEST(DistributionHelperTests, getPointerToDistribution_WhenOddTimeStep_ShouldBeSwapped) +{ + real distributions_in[27]; + for (int i = 0; i < 27; i++) + distributions_in[i] = i; + const int size_Mat = 1; + const bool isEvenTimeStep = false; + + Distributions27 distribution_out = vf::gpu::getDistributions27(distributions_in, size_Mat, isEvenTimeStep); + + EXPECT_THAT(*distribution_out.f[dirW], RealEq(distributions_in[dirE])); + EXPECT_THAT(*distribution_out.f[dirE], RealEq(distributions_in[dirW])); + EXPECT_THAT(*distribution_out.f[dirS], RealEq(distributions_in[dirN])); + EXPECT_THAT(*distribution_out.f[dirN], RealEq(distributions_in[dirS])); + EXPECT_THAT(*distribution_out.f[dirB], RealEq(distributions_in[dirT])); + EXPECT_THAT(*distribution_out.f[dirT], RealEq(distributions_in[dirB])); + EXPECT_THAT(*distribution_out.f[dirSW], RealEq(distributions_in[dirNE])); + EXPECT_THAT(*distribution_out.f[dirNE], RealEq(distributions_in[dirSW])); + EXPECT_THAT(*distribution_out.f[dirNW], RealEq(distributions_in[dirSE])); + EXPECT_THAT(*distribution_out.f[dirSE], RealEq(distributions_in[dirNW])); + EXPECT_THAT(*distribution_out.f[dirBW], RealEq(distributions_in[dirTE])); + EXPECT_THAT(*distribution_out.f[dirTE], RealEq(distributions_in[dirBW])); + EXPECT_THAT(*distribution_out.f[dirTW], RealEq(distributions_in[dirBE])); + EXPECT_THAT(*distribution_out.f[dirBE], RealEq(distributions_in[dirTW])); + EXPECT_THAT(*distribution_out.f[dirBS], RealEq(distributions_in[dirTN])); + EXPECT_THAT(*distribution_out.f[dirTN], RealEq(distributions_in[dirBS])); + EXPECT_THAT(*distribution_out.f[dirTS], RealEq(distributions_in[dirBN])); + EXPECT_THAT(*distribution_out.f[dirBN], RealEq(distributions_in[dirTS])); + EXPECT_THAT(*distribution_out.f[dirREST], RealEq(distributions_in[dirREST])); + EXPECT_THAT(*distribution_out.f[dirBSW], RealEq(distributions_in[dirTNE])); + EXPECT_THAT(*distribution_out.f[dirBNE], RealEq(distributions_in[dirTSW])); + EXPECT_THAT(*distribution_out.f[dirBNW], RealEq(distributions_in[dirTSE])); + EXPECT_THAT(*distribution_out.f[dirBSE], RealEq(distributions_in[dirTNW])); + EXPECT_THAT(*distribution_out.f[dirTSW], RealEq(distributions_in[dirBNE])); + EXPECT_THAT(*distribution_out.f[dirTNE], RealEq(distributions_in[dirBSW])); + EXPECT_THAT(*distribution_out.f[dirTNW], RealEq(distributions_in[dirBSE])); + EXPECT_THAT(*distribution_out.f[dirTSE], RealEq(distributions_in[dirBNW])); +} diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp index 847ff6afb3d69d3d403415a205992b11512d46f5..b99d44a85fa0de5d35402691f6b4848c7917e946 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp @@ -97,6 +97,7 @@ void KernelFactoryImp::setKernelAtLevel(std::vector<std::shared_ptr<Kernel>> ker std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> para, std::string kernel, int level) { + printf("Instantiating Kernel: %s\n", kernel.c_str()); std::shared_ptr<KernelImp> newKernel; std::shared_ptr<CheckParameterStrategy> checkStrategy; diff --git a/src/lbm/MacroscopicQuantities.h b/src/lbm/MacroscopicQuantities.h index e426c51b27d0003471e1a421107ab01440695db3..d9f942e2b9370673c6cbb45a7b3e4a590f34696f 100644 --- a/src/lbm/MacroscopicQuantities.h +++ b/src/lbm/MacroscopicQuantities.h @@ -10,6 +10,8 @@ #include <basics/Core/DataTypes.h> +#include "constants/NumericConstants.h" + #include "D3Q27.h" namespace vf @@ -17,6 +19,7 @@ namespace vf namespace lbm { + inline __host__ __device__ real getDensity(const real *const &f /*[27]*/) { return ((f[dir::TNE] + f[dir::BSW]) + (f[dir::TSE] + f[dir::BNW])) + ((f[dir::BSE] + f[dir::TNW]) + (f[dir::TSW] + f[dir::BNE])) + @@ -25,7 +28,9 @@ inline __host__ __device__ real getDensity(const real *const &f /*[27]*/) ((f[dir::E] + f[dir::W]) + (f[dir::N] + f[dir::S]) + (f[dir::T] + f[dir::B])) + f[dir::REST]; } - +/* +* Incompressible Macroscopic Quantities +*/ inline __host__ __device__ real getIncompressibleVelocityX1(const real *const &f /*[27]*/) { return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::TSE] - f[dir::BNW])) + ((f[dir::BSE] - f[dir::TNW]) + (f[dir::BNE] - f[dir::TSW]))) + @@ -48,6 +53,79 @@ inline __host__ __device__ real getIncompressibleVelocityX3(const real *const &f +/* +* Compressible Macroscopic Quantities +*/ +inline __host__ __device__ real getCompressibleVelocityX1(const real *const &f27, const real& rho) +{ + return getIncompressibleVelocityX1(f27) / (rho + constant::c1o1); +} + + +inline __host__ __device__ real getCompressibleVelocityX2(const real *const &f27, const real& rho) +{ + return getIncompressibleVelocityX2(f27) / (rho + constant::c1o1); +} + + +inline __host__ __device__ real getCompressibleVelocityX3(const real *const &f27, const real& rho) +{ + return getIncompressibleVelocityX3(f27) / (rho + constant::c1o1); +} + +/* +* Pressure +*/ +inline __host__ __device__ real getPressure(const real *const &f27, const real& rho, const real& vx, const real& vy, const real& vz) +{ + return (f27[dir::E] + f27[dir::W] + f27[dir::N] + f27[dir::S] + f27[dir::T] + f27[dir::B] + + constant::c2o1 * (f27[dir::NE] + f27[dir::SW] + f27[dir::SE] + f27[dir::NW] + f27[dir::TE] + + f27[dir::BW] + f27[dir::BE] + f27[dir::TW] + f27[dir::TN] + f27[dir::BS] + + f27[dir::BN] + f27[dir::TS]) + + constant::c3o1 * (f27[dir::TNE] + f27[dir::TSW] + f27[dir::TSE] + f27[dir::TNW] + + f27[dir::BNE] + f27[dir::BSW] + f27[dir::BSE] + f27[dir::BNW]) - + rho - (vx * vx + vy * vy + vz * vz) * (constant::c1o1 + rho)) * + constant::c1o2 + rho; +} + +// GPU: LBCalcMacCompSP27 +// rhoD[k] = (D.f[dirE])[ke] + (D.f[dirW])[kw] + (D.f[dirN])[kn] + (D.f[dirS])[ks] + (D.f[dirT])[kt] + (D.f[dirB])[kb] +// + (D.f[dirNE])[kne] + (D.f[dirSW])[ksw] + (D.f[dirSE])[kse] + (D.f[dirNW])[knw] + +// (D.f[dirTE])[kte] + (D.f[dirBW])[kbw] + (D.f[dirBE])[kbe] + (D.f[dirTW])[ktw] + (D.f[dirTN])[ktn] + +// (D.f[dirBS])[kbs] + (D.f[dirBN])[kbn] + (D.f[dirTS])[kts] + (D.f[dirZERO])[kzero] + +// (D.f[dirTNE])[ktne] + (D.f[dirTSW])[ktsw] + (D.f[dirTSE])[ktse] + (D.f[dirTNW])[ktnw] + +// (D.f[dirBNE])[kbne] + (D.f[dirBSW])[kbsw] + (D.f[dirBSE])[kbse] + (D.f[dirBNW])[kbnw]; + +// vxD[k] = ((D.f[dirE])[ke] - (D.f[dirW])[kw] + (D.f[dirNE])[kne] - (D.f[dirSW])[ksw] + (D.f[dirSE])[kse] - +// (D.f[dirNW])[knw] + (D.f[dirTE])[kte] - (D.f[dirBW])[kbw] + (D.f[dirBE])[kbe] - (D.f[dirTW])[ktw] + +// (D.f[dirTNE])[ktne] - (D.f[dirTSW])[ktsw] + (D.f[dirTSE])[ktse] - (D.f[dirTNW])[ktnw] + +// (D.f[dirBNE])[kbne] - (D.f[dirBSW])[kbsw] + (D.f[dirBSE])[kbse] - (D.f[dirBNW])[kbnw]) / +// (c1o1 + rhoD[k]); + +// vyD[k] = ((D.f[dirN])[kn] - (D.f[dirS])[ks] + (D.f[dirNE])[kne] - (D.f[dirSW])[ksw] - (D.f[dirSE])[kse] + +// (D.f[dirNW])[knw] + (D.f[dirTN])[ktn] - (D.f[dirBS])[kbs] + (D.f[dirBN])[kbn] - (D.f[dirTS])[kts] + +// (D.f[dirTNE])[ktne] - (D.f[dirTSW])[ktsw] - (D.f[dirTSE])[ktse] + (D.f[dirTNW])[ktnw] + +// (D.f[dirBNE])[kbne] - (D.f[dirBSW])[kbsw] - (D.f[dirBSE])[kbse] + (D.f[dirBNW])[kbnw]) / +// (c1o1 + rhoD[k]); + +// vzD[k] = ((D.f[dirT])[kt] - (D.f[dirB])[kb] + (D.f[dirTE])[kte] - (D.f[dirBW])[kbw] - (D.f[dirBE])[kbe] + +// (D.f[dirTW])[ktw] + (D.f[dirTN])[ktn] - (D.f[dirBS])[kbs] - (D.f[dirBN])[kbn] + (D.f[dirTS])[kts] + +// (D.f[dirTNE])[ktne] + (D.f[dirTSW])[ktsw] + (D.f[dirTSE])[ktse] + (D.f[dirTNW])[ktnw] - +// (D.f[dirBNE])[kbne] - (D.f[dirBSW])[kbsw] - (D.f[dirBSE])[kbse] - (D.f[dirBNW])[kbnw]) / +// (c1o1 + rhoD[k]); + +// pressD[k] = +// ((D.f[dirE])[ke] + (D.f[dirW])[kw] + (D.f[dirN])[kn] + (D.f[dirS])[ks] + (D.f[dirT])[kt] + (D.f[dirB])[kb] + +// c2o1 * ((D.f[dirNE])[kne] + (D.f[dirSW])[ksw] + (D.f[dirSE])[kse] + (D.f[dirNW])[knw] + (D.f[dirTE])[kte] + +// (D.f[dirBW])[kbw] + (D.f[dirBE])[kbe] + (D.f[dirTW])[ktw] + (D.f[dirTN])[ktn] + (D.f[dirBS])[kbs] + +// (D.f[dirBN])[kbn] + (D.f[dirTS])[kts]) + +// c3o1 * ((D.f[dirTNE])[ktne] + (D.f[dirTSW])[ktsw] + (D.f[dirTSE])[ktse] + (D.f[dirTNW])[ktnw] + +// (D.f[dirBNE])[kbne] + (D.f[dirBSW])[kbsw] + (D.f[dirBSE])[kbse] + (D.f[dirBNW])[kbnw]) - +// rhoD[k] - (vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1 + rhoD[k])) * +// c1o2 + +// rhoD[k]; // times zero for incompressible case +// // achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } }