diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h index 8932dadb3f92256cba279121193f94dbabdc0a6d..213127e64c5e7bfe2b3d4c9d1154c11d374f9552 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h @@ -1,26 +1,19 @@ -#ifndef KERNEL_H -#define KERNEL_H +#ifndef GPU_KERNEL_H +#define GPU_KERNEL_H -#include <DataTypes.h> - -#include <cuda_runtime.h> -#include <helper_functions.h> -#include <helper_cuda.h> +#include <vector> #include "Kernel/Utilities/KernelGroup.h" #include "PreProcessor/PreProcessorType.h" -#include <vector> - - class Kernel { public: - virtual ~Kernel() = default; - virtual void run() = 0; + virtual ~Kernel() = default; + virtual void run() = 0; - virtual bool checkParameter() = 0; - virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; - virtual KernelGroup getKernelGroup() = 0; + virtual bool checkParameter() = 0; + virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; + virtual KernelGroup getKernelGroup() = 0; }; -#endif \ No newline at end of file +#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp index 6d06d4c9175c7935bb609c2e342b29c50c42846d..ec961e9d2a83f82e1ce57852ccab4248a99958d1 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp @@ -2,26 +2,24 @@ #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h" -bool KernelImp::checkParameter() -{ - return checkStrategy->checkParameter(para); +bool KernelImp::checkParameter() +{ + return checkStrategy->checkParameter(para); } -std::vector<PreProcessorType> KernelImp::getPreProcessorTypes() -{ - return myPreProcessorTypes; +std::vector<PreProcessorType> KernelImp::getPreProcessorTypes() +{ + return myPreProcessorTypes; } -KernelGroup KernelImp::getKernelGroup() -{ - return myKernelGroup; +KernelGroup KernelImp::getKernelGroup() +{ + return myKernelGroup; } void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy) { - this->checkStrategy = strategy; + this->checkStrategy = strategy; } -KernelImp::KernelImp() -{ -} +KernelImp::KernelImp(std::shared_ptr<Parameter> para, int level) : para(para), level(level) {} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h index 2795ab94ea78254eaa37d4f9c0962bac4528ab1f..e293972306529ab7166028000a76db49fb36bf0f 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h @@ -13,24 +13,25 @@ class Parameter; class KernelImp : public Kernel { public: - virtual void run() = 0; + virtual void run() = 0; - bool checkParameter(); - std::vector<PreProcessorType> getPreProcessorTypes(); - KernelGroup getKernelGroup(); + bool checkParameter(); + std::vector<PreProcessorType> getPreProcessorTypes(); + KernelGroup getKernelGroup(); - void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy); + void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy); protected: - KernelImp(); + KernelImp(std::shared_ptr<Parameter> para, int level); + KernelImp() = default; - std::shared_ptr< Parameter> para; - std::shared_ptr<CheckParameterStrategy> checkStrategy; - int level; - std::vector<PreProcessorType> myPreProcessorTypes; - KernelGroup myKernelGroup; - - vf::gpu::CudaGrid cudaGrid; + std::shared_ptr<Parameter> para; + std::shared_ptr<CheckParameterStrategy> checkStrategy; + int level; + std::vector<PreProcessorType> myPreProcessorTypes; + KernelGroup myKernelGroup; + vf::gpu::CudaGrid cudaGrid; }; -#endif \ No newline at end of file + +#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu index bd3e08c859163a0852498e148f7e1380ce7c3bdd..4c82851996646590d8c246df5f940b58a308d52c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu @@ -9,14 +9,30 @@ #include <lbm/BGK.h> -std::shared_ptr<BGKUnified> BGKUnified::getNewInstance(std::shared_ptr<Parameter> para, int level) +namespace vf { - return std::make_shared<BGKUnified>(para, level); +namespace gpu +{ + + +BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level) + : KernelImp(para, level) +{ +#ifndef BUILD_CUDA_LTO + throw std::invalid_argument("To use the BKGUnified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0"); +#endif + + myPreProcessorTypes.push_back(InitCompSP27); + + myKernelGroup = BasicKernel; + + this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } + void BGKUnified::run() { - vf::gpu::GPUKernelParameter kernelParameter{ para->getParD(level)->omega, + GPUKernelParameter kernelParameter{ para->getParD(level)->omega, para->getParD(level)->geoSP, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -26,27 +42,15 @@ void BGKUnified::run() nullptr, /* forces not used in bgk kernel */ para->getParD(level)->evenOrOdd }; - auto lambda = [] __device__(vf::lbm::KernelParameter parameter) { - return vf::lbm::bgk(parameter); + auto lambda = [] __device__(lbm::KernelParameter parameter) { + return lbm::bgk(parameter); }; - vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter); + runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter); getLastCudaError("LB_Kernel_BGKUnified execution failed"); } -BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level) -{ -#ifndef BUILD_CUDA_LTO - throw std::invalid_argument("To use the BKGUnified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0"); -#endif - - this->para = para; - this->level = level; - myPreProcessorTypes.push_back(InitCompSP27); - - myKernelGroup = BasicKernel; - - this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); +} } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h index d8438173afd3dbb7259b110ef4f2939a11b6e196..762eaaa5935bd01fa6ae002521a40e45cd239dfd 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h @@ -3,13 +3,20 @@ #include "Kernel/KernelImp.h" +namespace vf +{ +namespace gpu +{ + class BGKUnified : public KernelImp { public: - static std::shared_ptr<BGKUnified> getNewInstance(std::shared_ptr<Parameter> para, int level); - void run(); - BGKUnified(std::shared_ptr<Parameter> para, int level); + + void run(); }; +} +} + #endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu index 90f6dad8eea81d52957dc854f9cfd0a3282556f1..b6f5d21ccf909f1ce3bcf11a4558f4771d87d021 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu @@ -8,14 +8,29 @@ #include <lbm/CumulantChimera.h> -std::shared_ptr<CumulantK15Unified> CumulantK15Unified::getNewInstance(std::shared_ptr<Parameter> para, int level) +namespace vf { - return std::make_shared<CumulantK15Unified>(para, level); +namespace gpu +{ + +CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int level) + : KernelImp(para, level) +{ +#ifndef BUILD_CUDA_LTO + throw std::invalid_argument( + "To use the CumulantK15Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0"); +#endif + + myPreProcessorTypes.push_back(InitCompSP27); + + myKernelGroup = BasicKernel; + + this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } void CumulantK15Unified::run() { - vf::gpu::GPUKernelParameter kernelParameter{ para->getParD(level)->omega, + GPUKernelParameter kernelParameter{ para->getParD(level)->omega, para->getParD(level)->geoSP, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -25,8 +40,8 @@ void CumulantK15Unified::run() para->getParD(level)->forcing, para->getParD(level)->evenOrOdd }; - auto lambda = [] __device__(vf::lbm::KernelParameter parameter) { - return vf::lbm::cumulantChimera(parameter, vf::lbm::setRelaxationRatesK15); + auto lambda = [] __device__(lbm::KernelParameter parameter) { + return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK15); }; vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter); @@ -34,19 +49,6 @@ void CumulantK15Unified::run() getLastCudaError("LB_Kernel_CumulantK15Comp execution failed"); } -CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int level) -{ -#ifndef BUILD_CUDA_LTO - throw std::invalid_argument( - "To use the CumulantK15Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0"); -#endif - - this->para = para; - this->level = level; - - myPreProcessorTypes.push_back(InitCompSP27); - - myKernelGroup = BasicKernel; - this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h index 666a605c40cef3d359238280f7c0578b056dbe05..8756253950484e00773af89327589c3d8f157729 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h @@ -3,12 +3,19 @@ #include "Kernel/KernelImp.h" +namespace vf +{ +namespace gpu +{ class CumulantK15Unified : public KernelImp { public: - static std::shared_ptr<CumulantK15Unified> getNewInstance(std::shared_ptr<Parameter> para, int level); - void run(); - CumulantK15Unified(std::shared_ptr<Parameter> para, int level); + + void run(); }; + +} +} + #endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu index 40398fe9cbd4856deb61c33d5ca20d5efddbe3b4..989fce0c5e797ef90d644845f6c502bee700f6e1 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu @@ -8,15 +8,31 @@ #include <lbm/CumulantChimera.h> +namespace vf +{ +namespace gpu +{ -std::shared_ptr<CumulantK17Unified> CumulantK17Unified::getNewInstance(std::shared_ptr<Parameter> para, int level) + +CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int level) + : KernelImp(para, level) { - return std::make_shared<CumulantK17Unified>(para, level); +#ifndef BUILD_CUDA_LTO + throw std::invalid_argument("To use the CumulantK17Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0"); +#endif + + myPreProcessorTypes.push_back(InitCompSP27); + + myKernelGroup = BasicKernel; + + this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); } + + void CumulantK17Unified::run() { - vf::gpu::GPUKernelParameter kernelParameter{ para->getParD(level)->omega, + GPUKernelParameter kernelParameter{ para->getParD(level)->omega, para->getParD(level)->geoSP, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, @@ -26,27 +42,15 @@ void CumulantK17Unified::run() para->getParD(level)->forcing, para->getParD(level)->evenOrOdd }; - auto lambda = [] __device__(vf::lbm::KernelParameter parameter) { - return vf::lbm::cumulantChimera(parameter, vf::lbm::setRelaxationRatesK17); + auto lambda = [] __device__(lbm::KernelParameter parameter) { + return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK17); }; - vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter); + runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter); getLastCudaError("LB_Kernel_CumulantK17Unified execution failed"); } -CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int level) -{ -#ifndef BUILD_CUDA_LTO - throw std::invalid_argument("To use the CumulantK17Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0"); -#endif - - this->para = para; - this->level = level; - - myPreProcessorTypes.push_back(InitCompSP27); - - myKernelGroup = BasicKernel; - this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP); +} } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h index d466b7696a72976e7b17ab43601189d4f75eaf94..af8470b717ad7a98e7a3fcd507976353d9e8bd41 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h @@ -3,13 +3,21 @@ #include "Kernel/KernelImp.h" +namespace vf +{ +namespace gpu +{ + + class CumulantK17Unified : public KernelImp { public: - static std::shared_ptr<CumulantK17Unified> getNewInstance(std::shared_ptr<Parameter> para, int level); - void run(); - CumulantK17Unified(std::shared_ptr<Parameter> para, int level); + + void run(); }; +} +} + #endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h index 5bdf2e9bb0f00e3e2ae3642baa42545079f93c2c..c9a6675bd7a4b82442c55b23bac6f9b49b811938 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h @@ -15,4 +15,4 @@ private: FluidFlowCompStrategy(); }; -#endif \ No newline at end of file +#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh index 65305549e6ed3bf41432e70175caef5165aacf8e..b4097851b251b7447f6ce06856d0b9187999a20b 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh @@ -5,7 +5,6 @@ #include <DataTypes.h> #include <cuda_runtime.h> -#include <lbm/Distribution27.h> #include <lbm/KernelParameter.h> #include "Kernel/Utilities/DistributionHelper.cuh" @@ -32,13 +31,13 @@ struct GPUKernelParameter template<typename KernelFunctor> __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter) { - const uint k = vf::gpu::getNodeIndex(); + const uint k = getNodeIndex(); const uint nodeType = kernelParameter.typeOfGridNode[k]; - if (!vf::gpu::isValidFluidNode(k, kernelParameter.size_Mat, nodeType)) + if (!isValidFluidNode(k, kernelParameter.size_Mat, nodeType)) return; - vf::gpu::DistributionWrapper distributionWrapper { + DistributionWrapper distributionWrapper { kernelParameter.distributions, kernelParameter.size_Mat, kernelParameter.isEvenTimestep, diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu index dd68ff05162f65552764ffda8a08d015a5d4a171..bbb01d95410612d36d55f1e0113175a8741b9ade 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu @@ -164,19 +164,6 @@ __device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType) (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2); } -__device__ void getLevelForce(real fx, real fy, real fz, int level, real *forces) -{ - real fx_t{ 1. }, fy_t{ 1. }, fz_t{ 1. }; - for (int i = 0; i < level; i++) { - fx_t *= vf::lbm::constant::c2o1; - fy_t *= vf::lbm::constant::c2o1; - fz_t *= vf::lbm::constant::c2o1; - } - - forces[0] = fx / fx_t; - forces[1] = fy / fy_t; - forces[2] = fz / fz_t; -} } // namespace gpu } // namespace vf diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh index 0e9173d9c39629faa65e4fcd14b976de4b283ffd..935030701924233d959fb69b74a7c3087feb0834 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh @@ -35,7 +35,7 @@ #include "LBM/LB.h" -#include <lbm/Distribution27.h> +#include <lbm/KernelParameter.h> namespace vf { @@ -92,8 +92,6 @@ __device__ unsigned int getNodeIndex(); __device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType); -__device__ void getLevelForce(real fx, real fy, real fz, int level, real* forces); - } } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp index efad7278d3e9d8b06355d9e8d4f230fa2c3c36b0..5f63df1c9afc17a62a9a47ce82401ebba4453872 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp @@ -106,8 +106,8 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> if (kernel == "BGKCompSP27") { newKernel = BGKCompSP27::getNewInstance(para, level); // compressible checkStrategy = FluidFlowCompStrategy::getInstance(); // || - } else if (kernel == "BGKUnified") { // \/ - newKernel = BGKUnified::getNewInstance(para, level); + } else if (kernel == "BGKUnified") { // \/ + newKernel = std::make_shared<vf::gpu::BGKUnified>(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); } else if (kernel == "BGKPlusCompSP27") { newKernel = BGKPlusCompSP27::getNewInstance(para, level); @@ -125,10 +125,10 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> newKernel = CumulantK17Comp::getNewInstance(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); } else if (kernel == "CumulantK15Unified") { - newKernel = CumulantK15Unified::getNewInstance(para, level); + newKernel = std::make_shared<vf::gpu::CumulantK15Unified>(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); } else if (kernel == "CumulantK17Unified") { - newKernel = CumulantK17Unified::getNewInstance(para, level); + newKernel = std::make_shared<vf::gpu::CumulantK17Unified>(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); } else if (kernel == "CumulantK17BulkComp") { newKernel = CumulantK17BulkComp::getNewInstance(para, level); diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index ab904167671f6525d4ea0bd0f459f4b01742b910..7dc916a47cbe08e543cc73d3c6c6cf5f163a16e7 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -3,6 +3,8 @@ #include <stdio.h> #include <vector> +#include <helper_timer.h> + #include "LBM/LB.h" #include "Communication/Communicator.h" #include "Communication/ExchangeData27.h" diff --git a/src/lbm/BGK.cpp b/src/lbm/BGK.cpp index 407807cf52dae2526e3782c72565852ce64a34d5..fa3af6777a0492687768dd4945cbf1e9b186f514 100644 --- a/src/lbm/BGK.cpp +++ b/src/lbm/BGK.cpp @@ -1,6 +1,5 @@ #include "BGK.h" -#include <cmath> #include <basics/Core/DataTypes.h> #include <basics/Core/RealConstants.h> @@ -8,7 +7,6 @@ #include "constants/NumericConstants.h" #include "constants/D3Q27.h" -#include "Chimera.h" #include "MacroscopicQuantities.h" namespace vf @@ -25,7 +23,6 @@ __host__ __device__ void bgk(KernelParameter parameter) auto& distribution = parameter.distribution; const auto omega = parameter.omega; - //////////////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to //! stored arrays dependent on timestep is based on the esoteric twist algorithm @@ -61,53 +58,39 @@ __host__ __device__ void bgk(KernelParameter parameter) //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - const real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + //! - Acquire macroscopic quantities + const real drho = getDensity(distribution.f); const real rho = c1o1 + drho; - const real OOrho = c1o1 / rho; - const real vvx = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) * OOrho; - const real vvy = - ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) * OOrho; - const real vvz = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) * OOrho; - - - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //BGK comp - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + const real OOrho = constant::c1o1 / (constant::c1o1 + drho); + + const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho; + const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho; + const real vvz = getIncompressibleVelocityX3(distribution.f) * OOrho; + + + //////////////////////////////////////////////////////////////////////////////////// + //! - BGK computation const real cusq = c3o2*(vvx*vvx + vvy*vvy + vvz*vvz); - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - mfbbb = mfbbb *(c1o1 + (-omega)) - (-omega)* c8o27* (drho - rho * cusq); - mfcbb = mfcbb *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(vvx)+c9o2*(vvx)*(vvx)-cusq)); - mfabb = mfabb *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(-vvx) + c9o2*(-vvx)*(-vvx) - cusq)); - mfbcb = mfbcb *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(vvy)+c9o2*(vvy)*(vvy)-cusq)); - mfbab = mfbab *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(-vvy) + c9o2*(-vvy)*(-vvy) - cusq)); - mfbbc = mfbbc *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(vvz)+c9o2*(vvz)*(vvz)-cusq)); - mfbba = mfbba *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(-vvz) + c9o2*(-vvz)*(-vvz) - cusq)); - mfccb = mfccb *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx + vvy) + c9o2*(vvx + vvy)*(vvx + vvy) - cusq)); - mfaab = mfaab *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx - vvy) + c9o2*(-vvx - vvy)*(-vvx - vvy) - cusq)); - mfcab = mfcab *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx - vvy) + c9o2*(vvx - vvy)*(vvx - vvy) - cusq)); - mfacb = mfacb *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx + vvy) + c9o2*(-vvx + vvy)*(-vvx + vvy) - cusq)); - mfcbc = mfcbc *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx + vvz) + c9o2*(vvx + vvz)*(vvx + vvz) - cusq)); - mfaba = mfaba *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx - vvz) + c9o2*(-vvx - vvz)*(-vvx - vvz) - cusq)); - mfcba = mfcba *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx - vvz) + c9o2*(vvx - vvz)*(vvx - vvz) - cusq)); - mfabc = mfabc *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx + vvz) + c9o2*(-vvx + vvz)*(-vvx + vvz) - cusq)); - mfbcc = mfbcc *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvy + vvz) + c9o2*(vvy + vvz)*(vvy + vvz) - cusq)); - mfbaa = mfbaa *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvy - vvz) + c9o2*(-vvy - vvz)*(-vvy - vvz) - cusq)); - mfbca = mfbca *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvy - vvz) + c9o2*(vvy - vvz)*(vvy - vvz) - cusq)); - mfbac = mfbac *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvy + vvz) + c9o2*(-vvy + vvz)*(-vvy + vvz) - cusq)); + + mfbbb = mfbbb *(c1o1 + (-omega)) - (-omega)* c8o27* (drho - rho * cusq); + mfcbb = mfcbb *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(vvx)+c9o2*(vvx)*(vvx)-cusq)); + mfabb = mfabb *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(-vvx) + c9o2*(-vvx)*(-vvx) - cusq)); + mfbcb = mfbcb *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(vvy)+c9o2*(vvy)*(vvy)-cusq)); + mfbab = mfbab *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(-vvy) + c9o2*(-vvy)*(-vvy) - cusq)); + mfbbc = mfbbc *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(vvz)+c9o2*(vvz)*(vvz)-cusq)); + mfbba = mfbba *(c1o1 + (-omega)) - (-omega)* c2o27* (drho + rho * (c3o1*(-vvz) + c9o2*(-vvz)*(-vvz) - cusq)); + mfccb = mfccb *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx + vvy) + c9o2*(vvx + vvy)*(vvx + vvy) - cusq)); + mfaab = mfaab *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx - vvy) + c9o2*(-vvx - vvy)*(-vvx - vvy) - cusq)); + mfcab = mfcab *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx - vvy) + c9o2*(vvx - vvy)*(vvx - vvy) - cusq)); + mfacb = mfacb *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx + vvy) + c9o2*(-vvx + vvy)*(-vvx + vvy) - cusq)); + mfcbc = mfcbc *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx + vvz) + c9o2*(vvx + vvz)*(vvx + vvz) - cusq)); + mfaba = mfaba *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx - vvz) + c9o2*(-vvx - vvz)*(-vvx - vvz) - cusq)); + mfcba = mfcba *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvx - vvz) + c9o2*(vvx - vvz)*(vvx - vvz) - cusq)); + mfabc = mfabc *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvx + vvz) + c9o2*(-vvx + vvz)*(-vvx + vvz) - cusq)); + mfbcc = mfbcc *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvy + vvz) + c9o2*(vvy + vvz)*(vvy + vvz) - cusq)); + mfbaa = mfbaa *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvy - vvz) + c9o2*(-vvy - vvz)*(-vvy - vvz) - cusq)); + mfbca = mfbca *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(vvy - vvz) + c9o2*(vvy - vvz)*(vvy - vvz) - cusq)); + mfbac = mfbac *(c1o1 + (-omega)) - (-omega)* c1o54* (drho + rho * (c3o1*(-vvy + vvz) + c9o2*(-vvy + vvz)*(-vvy + vvz) - cusq)); mfccc = mfccc *(c1o1 + (-omega)) - (-omega)* c1o216*(drho + rho * (c3o1*(vvx + vvy + vvz) + c9o2*(vvx + vvy + vvz)*(vvx + vvy + vvz) - cusq)); mfaaa = mfaaa *(c1o1 + (-omega)) - (-omega)* c1o216*(drho + rho * (c3o1*(-vvx - vvy - vvz) + c9o2*(-vvx - vvy - vvz)*(-vvx - vvy - vvz) - cusq)); mfcca = mfcca *(c1o1 + (-omega)) - (-omega)* c1o216*(drho + rho * (c3o1*(vvx + vvy - vvz) + c9o2*(vvx + vvy - vvz)*(vvx + vvy - vvz) - cusq)); @@ -122,33 +105,33 @@ __host__ __device__ void bgk(KernelParameter parameter) //! stored arrays dependent on timestep is based on the esoteric twist algorithm //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> //! - distribution.f[vf::lbm::dir::MZZ] = mfcbb; - distribution.f[vf::lbm::dir::PZZ] = mfabb; - distribution.f[vf::lbm::dir::ZMZ] = mfbcb; - distribution.f[vf::lbm::dir::ZPZ] = mfbab; - distribution.f[vf::lbm::dir::ZZM] = mfbbc; - distribution.f[vf::lbm::dir::ZZP] = mfbba; - distribution.f[vf::lbm::dir::MMZ] = mfccb; - distribution.f[vf::lbm::dir::PPZ] = mfaab; - distribution.f[vf::lbm::dir::MPZ] = mfcab; - distribution.f[vf::lbm::dir::PMZ] = mfacb; - distribution.f[vf::lbm::dir::MZM] = mfcbc; - distribution.f[vf::lbm::dir::PZP] = mfaba; - distribution.f[vf::lbm::dir::MZP] = mfcba; - distribution.f[vf::lbm::dir::PZM] = mfabc; - distribution.f[vf::lbm::dir::ZMM] = mfbcc; - distribution.f[vf::lbm::dir::ZPP] = mfbaa; - distribution.f[vf::lbm::dir::ZMP] = mfbca; - distribution.f[vf::lbm::dir::ZPM] = mfbac; - distribution.f[vf::lbm::dir::MMM] = mfccc; - distribution.f[vf::lbm::dir::PMM] = mfacc; - distribution.f[vf::lbm::dir::MPM] = mfcac; - distribution.f[vf::lbm::dir::PPM] = mfaac; - distribution.f[vf::lbm::dir::MMP] = mfcca; - distribution.f[vf::lbm::dir::PMP] = mfaca; - distribution.f[vf::lbm::dir::MPP] = mfcaa; - distribution.f[vf::lbm::dir::PPP] = mfaaa; - distribution.f[vf::lbm::dir::ZZZ] = mfbbb; + distribution.f[dir::MZZ] = mfcbb; + distribution.f[dir::PZZ] = mfabb; + distribution.f[dir::ZMZ] = mfbcb; + distribution.f[dir::ZPZ] = mfbab; + distribution.f[dir::ZZM] = mfbbc; + distribution.f[dir::ZZP] = mfbba; + distribution.f[dir::MMZ] = mfccb; + distribution.f[dir::PPZ] = mfaab; + distribution.f[dir::MPZ] = mfcab; + distribution.f[dir::PMZ] = mfacb; + distribution.f[dir::MZM] = mfcbc; + distribution.f[dir::PZP] = mfaba; + distribution.f[dir::MZP] = mfcba; + distribution.f[dir::PZM] = mfabc; + distribution.f[dir::ZMM] = mfbcc; + distribution.f[dir::ZPP] = mfbaa; + distribution.f[dir::ZMP] = mfbca; + distribution.f[dir::ZPM] = mfbac; + distribution.f[dir::MMM] = mfccc; + distribution.f[dir::PMM] = mfacc; + distribution.f[dir::MPM] = mfcac; + distribution.f[dir::PPM] = mfaac; + distribution.f[dir::MMP] = mfcca; + distribution.f[dir::PMP] = mfaca; + distribution.f[dir::MPP] = mfcaa; + distribution.f[dir::PPP] = mfaaa; + distribution.f[dir::ZZZ] = mfbbb; } diff --git a/src/lbm/CumulantChimera.cpp b/src/lbm/CumulantChimera.cpp index 77133d1b698767b9569c91ac1b7d68a68dfeb58b..65dc9b1f82f409447fda54fc0bc5f460533aad1b 100644 --- a/src/lbm/CumulantChimera.cpp +++ b/src/lbm/CumulantChimera.cpp @@ -110,48 +110,24 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa real mfaaa = distribution.f[dir::MMM]; real mfbbb = distribution.f[dir::ZZZ]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> - //! - const real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - const real rho = c1o1 + drho; - const real OOrho = c1o1 / rho; - real vvx = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) * OOrho; - real vvy = - ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) * OOrho; - real vvz = - ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) * OOrho; + + const real drho = getDensity(distribution.f); + const real OOrho = c1o1 / (c1o1 + drho); + //////////////////////////////////////////////////////////////////////////////////// //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> //! - vvx += forces[0] * c1o2; - vvy += forces[1] * c1o2; - vvz += forces[2] * c1o2; + const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho + forces[0] * c1o2; + const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho + forces[0] * c1o2; + const real vvz = getIncompressibleVelocityX3(distribution.f) * OOrho + forces[0] * c1o2; + //////////////////////////////////////////////////////////////////////////////////// // calculate the square of velocities for this lattice node - real vx2 = vvx*vvx; - real vy2 = vvy*vvy; - real vz2 = vvz*vvz; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> - //! - real wadjust; - real qudricLimitP = c1o100; - real qudricLimitM = c1o100; - real qudricLimitD = c1o100; + const real vx2 = vvx*vvx; + const real vy2 = vvy*vvy; + const real vz2 = vvz*vvz; + //////////////////////////////////////////////////////////////////////////////////// //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> @@ -209,8 +185,8 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity). //! - real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); - real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); + const real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); + const real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); //////////////////////////////////////////////////////////////////////////////////// //! - Compute cumulants from central moments according to Eq. (20)-(23) in //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> @@ -267,12 +243,12 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> //! Note that the division by rho is omitted here as we need rho times the gradients later. //! - real Dxy = -c3o1*omega*mfbba; - real Dxz = -c3o1*omega*mfbab; - real Dyz = -c3o1*omega*mfabb; - real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); - real dyuy = dxux + omega * c3o2 * mxxMyy; - real dzuz = dxux + omega * c3o2 * mxxMzz; + const real Dxy = -c3o1*omega*mfbba; + const real Dxz = -c3o1*omega*mfbab; + const real Dyz = -c3o1*omega*mfabb; + const real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); + const real dyuy = dxux + omega * c3o2 * mxxMyy; + const real dzuz = dxux + omega * c3o2 * mxxMzz; //////////////////////////////////////////////////////////// //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> @@ -293,10 +269,16 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa //relax ////////////////////////////////////////////////////////////////////////// // incl. limiter + //! Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05 040 ]</b></a> //! - wadjust = Oxyz + (c1o1 - Oxyz)*abs_internal(mfbbb) / (abs_internal(mfbbb) + qudricLimitD); + + const real qudricLimitP = c1o100; + const real qudricLimitM = c1o100; + const real qudricLimitD = c1o100; + + real wadjust = Oxyz + (c1o1 - Oxyz)*abs_internal(mfbbb) / (abs_internal(mfbbb) + qudricLimitD); mfbbb += wadjust * (-mfbbb); wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxxyPyzz) / (abs_internal(mxxyPyzz) + qudricLimitP); mxxyPyzz += wadjust * (-mxxyPyzz); @@ -436,33 +418,33 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa //! stored arrays dependent on timestep is based on the esoteric twist algorithm //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> //! - distribution.f[vf::lbm::dir::MZZ] = mfcbb; - distribution.f[vf::lbm::dir::PZZ] = mfabb; - distribution.f[vf::lbm::dir::ZMZ] = mfbcb; - distribution.f[vf::lbm::dir::ZPZ] = mfbab; - distribution.f[vf::lbm::dir::ZZM] = mfbbc; - distribution.f[vf::lbm::dir::ZZP] = mfbba; - distribution.f[vf::lbm::dir::MMZ] = mfccb; - distribution.f[vf::lbm::dir::PPZ] = mfaab; - distribution.f[vf::lbm::dir::MPZ] = mfcab; - distribution.f[vf::lbm::dir::PMZ] = mfacb; - distribution.f[vf::lbm::dir::MZM] = mfcbc; - distribution.f[vf::lbm::dir::PZP] = mfaba; - distribution.f[vf::lbm::dir::MZP] = mfcba; - distribution.f[vf::lbm::dir::PZM] = mfabc; - distribution.f[vf::lbm::dir::ZMM] = mfbcc; - distribution.f[vf::lbm::dir::ZPP] = mfbaa; - distribution.f[vf::lbm::dir::ZMP] = mfbca; - distribution.f[vf::lbm::dir::ZPM] = mfbac; - distribution.f[vf::lbm::dir::MMM] = mfccc; - distribution.f[vf::lbm::dir::PMM] = mfacc; - distribution.f[vf::lbm::dir::MPM] = mfcac; - distribution.f[vf::lbm::dir::PPM] = mfaac; - distribution.f[vf::lbm::dir::MMP] = mfcca; - distribution.f[vf::lbm::dir::PMP] = mfaca; - distribution.f[vf::lbm::dir::MPP] = mfcaa; - distribution.f[vf::lbm::dir::PPP] = mfaaa; - distribution.f[vf::lbm::dir::ZZZ] = mfbbb; + distribution.f[dir::MZZ] = mfcbb; + distribution.f[dir::PZZ] = mfabb; + distribution.f[dir::ZMZ] = mfbcb; + distribution.f[dir::ZPZ] = mfbab; + distribution.f[dir::ZZM] = mfbbc; + distribution.f[dir::ZZP] = mfbba; + distribution.f[dir::MMZ] = mfccb; + distribution.f[dir::PPZ] = mfaab; + distribution.f[dir::MPZ] = mfcab; + distribution.f[dir::PMZ] = mfacb; + distribution.f[dir::MZM] = mfcbc; + distribution.f[dir::PZP] = mfaba; + distribution.f[dir::MZP] = mfcba; + distribution.f[dir::PZM] = mfabc; + distribution.f[dir::ZMM] = mfbcc; + distribution.f[dir::ZPP] = mfbaa; + distribution.f[dir::ZMP] = mfbca; + distribution.f[dir::ZPM] = mfbac; + distribution.f[dir::MMM] = mfccc; + distribution.f[dir::PMM] = mfacc; + distribution.f[dir::MPM] = mfcac; + distribution.f[dir::PPM] = mfaac; + distribution.f[dir::MMP] = mfcca; + distribution.f[dir::PMP] = mfaca; + distribution.f[dir::MPP] = mfcaa; + distribution.f[dir::PPP] = mfaaa; + distribution.f[dir::ZZZ] = mfbbb; } diff --git a/src/lbm/Distribution27.h b/src/lbm/Distribution27.h deleted file mode 100644 index cebd2468bf111439d6bf9820ff39b883f4289a1e..0000000000000000000000000000000000000000 --- a/src/lbm/Distribution27.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef LBM_DISTRIBUTION_27_H -#define LBM_DISTRIBUTION_27_H - -#ifndef __host__ -#define __host__ -#endif -#ifndef __device__ -#define __device__ -#endif - - -#include <basics/Core/DataTypes.h> - -namespace vf -{ -namespace lbm -{ - - -struct Distribution27 -{ - real f[27]; - - __host__ __device__ real getDensity_() const; -}; - - -__host__ __device__ real abs_internal(real value); - - -} -} - -#endif diff --git a/src/lbm/Distribution27.cpp b/src/lbm/KernelParameter.cpp similarity index 91% rename from src/lbm/Distribution27.cpp rename to src/lbm/KernelParameter.cpp index a84dc9c7fb355d6d642fb13ec0486bad17f7e20e..15a0f9c445fa4fc7b1fba4b0482da29068b72951 100644 --- a/src/lbm/Distribution27.cpp +++ b/src/lbm/KernelParameter.cpp @@ -1,4 +1,4 @@ -#include "Distribution27.h" +#include "KernelParameter.h" #include "MacroscopicQuantities.h" diff --git a/src/lbm/KernelParameter.h b/src/lbm/KernelParameter.h index b60e43439dda3f2c592b67b6f9491d010891a32c..95226628110637f3794c8a1f7e6f6c1f6dda937b 100644 --- a/src/lbm/KernelParameter.h +++ b/src/lbm/KernelParameter.h @@ -1,5 +1,5 @@ -#ifndef LBM_CUMULANT_CHIMERA_PARAMETER_H -#define LBM_CUMULANT_CHIMERA_PARAMETER_H +#ifndef LBM_KERNEL_PARAMETER_H +#define LBM_KERNEL_PARAMETER_H #ifndef __host__ #define __host__ @@ -10,13 +10,22 @@ #include <basics/Core/DataTypes.h> -#include "Distribution27.h" namespace vf { namespace lbm { +struct Distribution27 +{ + real f[27]; + + __host__ __device__ real getDensity_() const; +}; + + +__host__ __device__ real abs_internal(real value); + struct KernelParameter { @@ -27,6 +36,7 @@ struct KernelParameter + } } diff --git a/src/lbm/MacroscopicQuantities.h b/src/lbm/MacroscopicQuantities.h index 1a440b5322ca2b4ac63c68e35b2442551b5003f0..c37791294ff5b4edad21795f4ce0a32a18c5d236 100644 --- a/src/lbm/MacroscopicQuantities.h +++ b/src/lbm/MacroscopicQuantities.h @@ -17,7 +17,11 @@ namespace vf { namespace lbm { - + +//////////////////////////////////////////////////////////////////////////////////// +//! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref +//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa 2015.05.001 ]</b></a> +//! inline __host__ __device__ real getDensity(const real *const &f /*[27]*/) { diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt index 7b1741478550f3d297fe1a8254de63c902888125..be16988a480650cbab416652655af9766bcf8ec7 100644 --- a/src/lbm/cuda/CMakeLists.txt +++ b/src/lbm/cuda/CMakeLists.txt @@ -6,6 +6,7 @@ vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -set_source_files_properties(../Distribution27.cpp PROPERTIES LANGUAGE CUDA) +set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA) \ No newline at end of file