diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a2e90d98acc29810ef98e96c7903b7363fda4233..2c6c35332b372379891de0c1ecc14640b48cbca8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -459,7 +459,9 @@ gcov_gcc_9: - coverage/ reports: - cobertura: coverage/coverage.xml + coverage_report: + coverage_format: cobertura + path: coverage/coverage.xml cache: key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" diff --git a/Python/boundary_layer/__init__.py b/Python/boundary_layer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..cf941a9418e5c3ec5d94864f119de20401601622 --- /dev/null +++ b/Python/boundary_layer/boundary_layer.py @@ -0,0 +1,108 @@ +#%% +import numpy as np +from pathlib import Path +from mpi4py import MPI +from pyfluids import basics, gpu, logger +#%% +reference_diameter = 126 + +length = np.array([30,8,8])*reference_diameter +viscosity = 1.56e-5 +velocity = 9 +mach = 0.1 +nodes_per_diameter = 32 + +sim_name = "BoundaryLayer" +config_file = Path(__file__).parent/Path("config.txt") +output_path = Path(__file__).parent/Path("output") +output_path.mkdir(exist_ok=True) +timeStepOut = 500 +t_end = 50 + +#%% +logger.Logger.initialize_logger() +basics.logger.Logger.add_stdout() +basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW) +basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE) +basics.logger.Logger.enable_printed_rank_numbers(True) +#%% +grid_builder = gpu.MultipleGridBuilder.make_shared() +dx = reference_diameter/nodes_per_diameter + +grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) +grid_builder.set_periodic_boundary_condition(False, False, False) +grid_builder.build_grids(basics.LbmOrGks.LBM, False) +# %% +comm = gpu.Communicator.get_instance() +#%% +config = basics.ConfigurationFile() +config.load(str(config_file)) +#%% +para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid()) + +dt = dx * mach / (np.sqrt(3) * velocity) +velocity_lb = velocity * dt / dx # LB units +viscosity_lb = viscosity * dt / (dx * dx) # LB units + +#%% +para.set_devices([0]) +para.set_output_prefix(sim_name) +para.set_output_path(str(output_path)) +para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix()) +para.set_print_files(True) +para.set_max_level(1) +#%% +para.set_velocity(velocity_lb) +para.set_viscosity(viscosity_lb) +para.set_velocity_ratio(dx/dt) +para.set_main_kernel("CumulantK17CompChim") + +def init_func(coord_x, coord_y, coord_z): + return [0.0, velocity_lb, 0.0, 0.0] + +para.set_initial_condition(init_func) +para.set_t_out(timeStepOut) +para.set_t_end(int(t_end/dt)) +para.set_is_body_force(True) + +#%% +grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_lb, 0.0, 0.0) +grid_builder.set_velocity_boundary_condition(gpu.SideType.PX, velocity_lb, 0.0, 0.0) + +grid_builder.set_velocity_boundary_condition(gpu.SideType.MY, velocity_lb, 0.0, 0.0) +grid_builder.set_velocity_boundary_condition(gpu.SideType.PY, velocity_lb, 0.0, 0.0) + +grid_builder.set_velocity_boundary_condition(gpu.SideType.MZ, velocity_lb, 0.0, 0.0) +grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0, 0.0) + +#%% +cuda_memory_manager = gpu.CudaMemoryManager.make(para) +grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager) +#%% +turb_pos = np.array([3,3,3])*reference_diameter +epsilon = 5 +density = 1.225 +level = 0 +n_blades = 3 +n_blade_nodes = 32 +alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx) +para.add_actuator(alm) +#%% +point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 500, 100) +point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter) +point_probe.add_post_processing_variable(gpu.probes.PostProcessingVariable.Means) + +para.add_probe(point_probe) + +plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 500, 100) +plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2]) +para.add_probe(plane_probe) +#%% +sim = gpu.Simulation(comm) +kernel_factory = gpu.KernelFactory.get_instance() +sim.set_factories(kernel_factory, gpu.PreProcessorFactory.get_instance()) +sim.init(para, grid_generator, gpu.FileWriter(), cuda_memory_manager) +#%% +sim.run() +sim.free() +MPI.Finalize() \ No newline at end of file diff --git a/Python/boundary_layer/config.txt b/Python/boundary_layer/config.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4c778c4cc048f54c0a32310e6bf4a7343a263fa --- /dev/null +++ b/Python/boundary_layer/config.txt @@ -0,0 +1,2 @@ +Path = . +GridPath = . diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp index 6f22d023925ad68bfb9bd5b14f845813cad71105..6be64950710c53b3c7931180a9beb1368a615fe3 100644 --- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp +++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp @@ -191,7 +191,7 @@ void multipleLevel(const std::string& configPath) SPtr<ActuatorLine> actuator_line =SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) ); para->addActuator( actuator_line ); - SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", para->getOutputPath(), 100, 500, 100) ); + SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", para->getOutputPath(), 100, 1, 500, 100) ); std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter}; std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c440bd14cf46ca8dae8013b5c0a480109924f7c4 --- /dev/null +++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp @@ -0,0 +1,270 @@ + +#define _USE_MATH_DEFINES +#include <math.h> +#include <string> +#include <sstream> +#include <iostream> +#include <stdexcept> +#include <fstream> +#include <exception> +#include <memory> + +////////////////////////////////////////////////////////////////////////// + +#include "Core/DataTypes.h" +#include "PointerDefinitions.h" + +#include "Core/StringUtilities/StringUtil.h" + +#include "Core/VectorTypes.h" + +#include <basics/config/ConfigurationFile.h> + +#include <logger/Logger.h> + + +////////////////////////////////////////////////////////////////////////// + +#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" +#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" +#include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/GridFactory.h" + +#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" +#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" +#include "GridGenerator/io/STLReaderWriter/STLReader.h" +#include "GridGenerator/io/STLReaderWriter/STLWriter.h" + +////////////////////////////////////////////////////////////////////////// + +#include "VirtualFluids_GPU/LBM/Simulation.h" +#include "VirtualFluids_GPU/Communication/Communicator.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" +#include "VirtualFluids_GPU/Parameter/Parameter.h" +#include "VirtualFluids_GPU/Output/FileWriter.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h" + +#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h" +#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h" + +#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +std::string path("."); + +std::string simulationName("BoundayLayer"); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +void multipleLevel(const std::string& configPath) +{ + + logging::Logger::addStream(&std::cout); + logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW); + logging::Logger::timeStamp(logging::Logger::ENABLE); + logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE); + + auto gridFactory = GridFactory::make(); + auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance(); + + vf::basics::ConfigurationFile config; + config.load(configPath); + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^ + SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID()); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // + // U s e r s e t t i n g s + // + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + LbmOrGks lbmOrGks = LBM; + + const real H = 1000.0; // boundary layer height in m + + const real L_x = 6*H; + const real L_y = 4*H; + const real L_z = 1*H; + + const real z0 = 0.1; // roughness length in m + const real u_star = 0.4; //friction velocity in m/s + const real kappa = 0.4; // von Karman constant + + const real viscosity = 1.56e-5; + + const real velocity = 0.5*u_star/kappa*log(L_z/z0); //0.5 times max mean velocity at the top in m/s + + const real mach = config.contains("Ma")? config.getValue<real>("Ma"): 0.1; + + const uint nodes_per_H = config.contains("nz")? config.getValue<uint>("nz"): 64; + + // all in s + const float tStartOut = config.getValue<real>("tStartOut"); + const float tOut = config.getValue<real>("tOut"); + const float tEnd = config.getValue<real>("tEnd"); // total time of simulation + + const float tStartAveraging = config.getValue<real>("tStartAveraging"); + const float tStartTmpAveraging = config.getValue<real>("tStartTmpAveraging"); + const float tAveraging = config.getValue<real>("tAveraging"); + const float tStartOutProbe = config.getValue<real>("tStartOutProbe"); + const float tOutProbe = config.getValue<real>("tOutProbe"); + + + const real dx = L_z/real(nodes_per_H); + + const real dt = dx * mach / (sqrt(3) * velocity); + + const real velocityLB = velocity * dt / dx; // LB units + + const real viscosityLB = viscosity * dt / (dx * dx); // LB units + + const real pressureGradient = u_star * u_star / H ; + const real pressureGradientLB = pressureGradient * (dt*dt)/dx; // LB units + + VF_LOG_INFO("velocity [dx/dt] = {}", velocityLB); + VF_LOG_INFO("dt = {}", dt); + VF_LOG_INFO("dx = {}", dx); + VF_LOG_INFO("viscosity [10^8 dx^2/dt] = {}", viscosityLB*1e8); + VF_LOG_INFO("u* /(dx/dt) = {}", u_star*dt/dx); + VF_LOG_INFO("dpdx = {}", pressureGradient); + VF_LOG_INFO("dpdx /(dx/dt^2) = {}", pressureGradientLB); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + para->setOutputPrefix( simulationName ); + + para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix()); + + para->setPrintFiles(true); + + para->setForcing(pressureGradientLB, 0, 0); + para->setVelocity(velocityLB); + para->setViscosity(viscosityLB); + para->setVelocityRatio( dx / dt ); + para->setViscosityRatio( dx*dx/dt ); + para->setDensityRatio( 1.0 ); + + if(para->getUseAMD()) + para->setMainKernel("TurbulentViscosityCumulantK17CompChim"); + else + para->setMainKernel("CumulantK17CompChim"); + + para->setIsBodyForce( config.getValue<bool>("bodyForce") ); + + para->setTStartOut(uint(tStartOut/dt) ); + para->setTOut( uint(tOut/dt) ); + para->setTEnd( uint(tEnd/dt) ); + + // para->setTOut( 100 ); + // para->setTEnd( 100000 ); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, + L_x, L_y, L_z, dx); + // gridBuilder->setNumberOfLayers(0,0); + // gridBuilder->addGrid( new Cuboid( 300., 300., 300., 1000. , 1000., 600.), 1 ); + + gridBuilder->setPeriodicBoundaryCondition(true, true, false); + + gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!! + + uint samplingOffset = 2; + // gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0); + gridBuilder->setStressBoundaryCondition(SideType::MZ, + 0.0, 0.0, 1.0, // wall normals + samplingOffset, z0/dx); // wall model settinng + para->setHasWallModelMonitor(true); + + + // gridBuilder->setVelocityBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0); + gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0); + + real cPi = 3.1415926535897932384626433832795; + para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { + rho = (real)0.0; + vx = (u_star/0.4 * log(coordZ/z0) + 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt / dx; + vy = 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt / dx; + vz = 8.0*u_star/0.4*(sin(cPi*8.0*coordY/H)*sin(cPi*8.0*coordZ/H)+sin(cPi*8.0*coordX/L_x))/(pow(L_z/2.0-coordZ, c2o1)+c1o1) * dt / dx; + }); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para); + + SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager); + + SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("planeProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') ); + planarAverageProbe->addAllAvailableStatistics(); + planarAverageProbe->setFileNameToNOut(); + para->addProbe( planarAverageProbe ); + + para->setHasWallModelMonitor(true); + SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) ); + wallModelProbe->addAllAvailableStatistics(); + wallModelProbe->setFileNameToNOut(); + wallModelProbe->setForceOutputToStress(true); + if(para->getIsBodyForce()) + wallModelProbe->setEvaluatePressureGradient(true); + para->addProbe( wallModelProbe ); + + Simulation sim(communicator); + SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter()); + SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance(); + SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance(); + sim.setFactories(kernelFactory, preProcessorFactory); + sim.init(para, gridGenerator, fileWriter, cudaMemoryManager); + sim.run(); + sim.free(); +} + +int main( int argc, char* argv[]) +{ + if ( argv != NULL ) + { + try + { + vf::logging::Logger::initalizeLogger(); + + if( argc > 1){ path = argv[1]; } + + multipleLevel(path + "/configBoundaryLayer.txt"); + } + catch (const spdlog::spdlog_ex &ex) { + std::cout << "Log initialization failed: " << ex.what() << std::endl; + } + + catch (const std::bad_alloc& e) + { + VF_LOG_CRITICAL("Bad Alloc: {}", e.what()); + } + catch (const std::exception& e) + { + VF_LOG_CRITICAL("exception: {}", e.what()); + } + catch (...) + { + VF_LOG_CRITICAL("Unknown exception!"); + } + } + return 0; +} diff --git a/apps/gpu/LBM/BoundaryLayer/CMakeLists.txt b/apps/gpu/LBM/BoundaryLayer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..801b634803943d48abda690935df0867eb3418d2 --- /dev/null +++ b/apps/gpu/LBM/BoundaryLayer/CMakeLists.txt @@ -0,0 +1,7 @@ +PROJECT(BoundaryLayer LANGUAGES CUDA CXX) + +vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES BoundaryLayer.cpp) + +set_source_files_properties(BoundaryLayer.cpp PROPERTIES LANGUAGE CUDA) + +set_target_properties(BoundaryLayer PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt new file mode 100644 index 0000000000000000000000000000000000000000..a489f0ab89738a193b16fee41c212a5943f6525d --- /dev/null +++ b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt @@ -0,0 +1,30 @@ +################################################## +#informations for Writing +################################################## +Path = . +################################################## +#informations for reading +################################################## +GridPath = . +################################################## +Devices = 1 +################################################## +tStartOut = 0 +tOut = 100000 +tEnd = 300000 +################################################## +tStartAveraging = 0 +tStartTmpAveraging = 100000 +tAveraging = 200 +tStartOutProbe = 0 +tOutProbe = 1000 +################################################## +Ma = 0.1 +nz = 96 + +bodyForce = true +UseAMD = true +SGSconstant = 0.2 +QuadricLimiterP = 100000.0 +QuadricLimiterM = 100000.0 +QuadricLimiterD = 100000.0 diff --git a/gpu.cmake b/gpu.cmake index 44c3ce9ab3eb8d99ed8ede0ddc58bfe4112b78dd..4a1b1a9eb070dcb85ff0c4147fa3b272372a2da9 100644 --- a/gpu.cmake +++ b/gpu.cmake @@ -37,6 +37,7 @@ IF (BUILD_VF_GPU) #add_subdirectory(apps/gpu/LBM/TGV_3D) #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU) #add_subdirectory(apps/gpu/LBM/ActuatorLine) + add_subdirectory(apps/gpu/LBM/BoundaryLayer) ELSE() MESSAGE( STATUS "exclude Virtual Fluids GPU." ) ENDIF() @@ -130,4 +131,4 @@ endif() if(BUILD_VF_TRAFFIC) add_subdirectory(src/gpu/Traffic) add_subdirectory(apps/gpu/LBM/TrafficTest) -endif() \ No newline at end of file +endif() diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp index 8930bdf3b165b4e0dbb497773fd0b6cf6ec6f8f7..5102f60fc295aadf4323a4b332bf3dd8f7f21dbf 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp @@ -42,6 +42,8 @@ bool gg::BoundaryCondition::isSide( SideType side ) const return this->side->whoAmI() == side; } +////////////////////////////////////////////////////////////////////////// + void VelocityBoundaryCondition::setVelocityProfile( SPtr<Grid> grid, std::function<void(real, real, real, real &, real &, real &)> velocityProfile) { @@ -55,6 +57,8 @@ void VelocityBoundaryCondition::setVelocityProfile( } } +////////////////////////////////////////////////////////////////////////// + void GeometryBoundaryCondition::setTangentialVelocityForPatch(SPtr<Grid> grid, uint patch, real p1x, real p1y, real p1z, real p2x, real p2y, real p2z, @@ -102,3 +106,23 @@ void GeometryBoundaryCondition::setTangentialVelocityForPatch(SPtr<Grid> grid, u } } } + +////////////////////////////////////////////////////////////////////////// + +void StressBoundaryCondition::fillSamplingIndices(std::vector<SPtr<Grid> > grid, uint level, uint samplingOffset) +{ + + for( uint i = 0; i < this->indices.size(); i++ ) + { + real x, y, z; + grid[level]->transIndexToCoords(this->indices[i], x, y, z); + + real x_sampling = x + this->getNormalx(i)*samplingOffset*grid[level]->getDelta(); + real y_sampling = y + this->getNormaly(i)*samplingOffset*grid[level]->getDelta(); + real z_sampling = z + this->getNormalz(i)*samplingOffset*grid[level]->getDelta(); + + this->velocitySamplingIndices.push_back( grid[level]->transCoordToIndex(x_sampling, y_sampling, z_sampling) ); + } + +} + diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h index 9ae5f09e208e92213ca90ff75f095eddd5dbeaf1..8ea4c7ea6e37be1fd5ef8dbd1685f55b1ad549e0 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h @@ -118,6 +118,52 @@ public: } void fillSlipNormalLists() + { + for (uint index : this->indices) { + (void)index; + this->normalXList.push_back(normalX); + this->normalYList.push_back(normalY); + this->normalZList.push_back(normalZ); + } + } + + real getNormalx() { return this->normalX; } + real getNormaly() { return this->normalY; } + real getNormalz() { return this->normalZ; } + + real getNormalx(uint index) { return this->normalXList[index]; } + real getNormaly(uint index) { return this->normalYList[index]; } + real getNormalz(uint index) { return this->normalZList[index]; } +}; + +////////////////////////////////////////////////////////////////////////// + +class StressBoundaryCondition : public gg::BoundaryCondition +{ +public: + static SPtr<StressBoundaryCondition> make(real normalX, real normalY, real normalZ, uint samplingOffset, real z0) + { + return SPtr<StressBoundaryCondition>(new StressBoundaryCondition(normalX, normalY, normalZ, samplingOffset, z0)); + } + + real normalX, normalY, normalZ; + uint samplingOffset; + real z0; + std::vector<real> normalXList, normalYList, normalZList; + std::vector<uint> samplingOffsetList; + std::vector<real> z0List; + std::vector<uint> velocitySamplingIndices; + +protected: + StressBoundaryCondition(real normalX, real normalY, real normalZ, uint samplingOffset, real z0) : normalX(normalX), normalY(normalY), normalZ(normalZ), samplingOffset(samplingOffset), z0(z0){ } + +public: + virtual char getType() const override + { + return vf::gpu::BC_STRESS; + } + + void fillStressNormalLists() { for (uint index : this->indices) { (void)index; @@ -127,6 +173,22 @@ public: } } + void fillZ0Lists() + { + for (uint index : this->indices) { + (void)index; + this->z0List.push_back(z0); + } + } + + void fillSamplingOffsetLists() + { + for (uint index : this->indices) { + (void)index; + this->samplingOffsetList.push_back(samplingOffset); + } + } + real getNormalx() { return this->normalX; } real getNormaly() { return this->normalY; } real getNormalz() { return this->normalZ; } @@ -134,6 +196,15 @@ public: real getNormalx(uint index) { return this->normalXList[index]; } real getNormaly(uint index) { return this->normalYList[index]; } real getNormalz(uint index) { return this->normalZList[index]; } + + uint getSamplingOffset() { return this->samplingOffset; } + uint getSamplingOffset(uint index) { return this->samplingOffsetList[index]; } + + real getZ0() { return this->z0; } + real getZ0(uint index) { return this->z0List[index]; } + + void fillSamplingIndices(std::vector<SPtr<Grid> > grid, uint level, uint samplingOffset); + }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index f76844c134cdc4117d010f8f7f667640d38cc2e2..6c7bf8ca1853826d83fb6a713ffe03716bd2cf9a 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -53,16 +53,18 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF ) ) + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF )) { grid->setFieldEntry(index, boundaryCondition->getType()); boundaryCondition->indices.push_back(index); setPressureNeighborIndices(boundaryCondition, grid, index); + setStressSamplingIndices(boundaryCondition, grid, index); setQs(grid, boundaryCondition, index); boundaryCondition->patches.push_back(0); } + } } } @@ -91,6 +93,30 @@ void Side::setPressureNeighborIndices(SPtr<BoundaryCondition> boundaryCondition, } } +void Side::setStressSamplingIndices(SPtr<BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index) +{ + auto stressBoundaryCondition = std::dynamic_pointer_cast<StressBoundaryCondition>(boundaryCondition); + if (stressBoundaryCondition) + { + real x, y, z; + grid->transIndexToCoords(index, x, y, z); + + real nx = x; + real ny = y; + real nz = z; + + if (boundaryCondition->side->getCoordinate() == X_INDEX) + nx = -boundaryCondition->side->getDirection() * stressBoundaryCondition->samplingOffset * grid->getDelta() + x; + if (boundaryCondition->side->getCoordinate() == Y_INDEX) + ny = -boundaryCondition->side->getDirection() * stressBoundaryCondition->samplingOffset * grid->getDelta() + y; + if (boundaryCondition->side->getCoordinate() == Z_INDEX) + nz = -boundaryCondition->side->getDirection() * stressBoundaryCondition->samplingOffset * grid->getDelta() + z; + + uint samplingIndex = grid->transCoordToIndex(nx, ny, nz); + stressBoundaryCondition->velocitySamplingIndices.push_back(samplingIndex); + } +} + void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uint index) { @@ -133,6 +159,7 @@ void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uin qNode[dir] = 0.5; else qNode[dir] = -1.0; + } boundaryCondition->qs.push_back(qNode); @@ -280,6 +307,6 @@ void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond real coordinateNormal = grid[level]->getEndZ() - grid[level]->getDelta(); if( coordinateNormal < grid[0]->getEndZ() - grid[0]->getDelta() ) return; - + Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter); } diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h index d4c9e3a4bcab73d368c863ee57d66f692126fa06..c9ffd40b0aa8fc2b8da8b4d85de60faea6927117 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h @@ -78,6 +78,8 @@ protected: static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); + static void setStressSamplingIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); + static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); private: diff --git a/src/gpu/GridGenerator/grid/Field.cpp b/src/gpu/GridGenerator/grid/Field.cpp index d8ac2a80ea6fc5da879c5378aac2eab70016ff72..86985af60e1ca25c247b586dbc2f356c665a8875 100644 --- a/src/gpu/GridGenerator/grid/Field.cpp +++ b/src/gpu/GridGenerator/grid/Field.cpp @@ -130,7 +130,7 @@ bool Field::isQ(uint index) const bool Field::isBoundaryConditionNode(uint index) const { - return field[index] == BC_SOLID || field[index] == BC_OUTFLOW || field[index] == BC_VELOCITY || field[index] == BC_PRESSURE || field[index] == BC_SLIP; + return field[index] == BC_SOLID || field[index] == BC_OUTFLOW || field[index] == BC_VELOCITY || field[index] == BC_PRESSURE || field[index] == BC_SLIP || field[index] == BC_STRESS; } // --------------------------------------------------------- // diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index 6ab8efc88d02e1032c2d26c756e84d4fa33359ac..a5ee3943f23ed4e9ffa1acb92ffc525e9de7780c 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -98,6 +98,13 @@ public: virtual void getSlipValues(real *normalX, real *normalY, real *normalZ, int *indices, int level) const = 0; virtual void getSlipQs(real* qs[27], int level) const = 0; + virtual uint getStressSize(int level) const = 0; + virtual void getStressValues(real *normalX, real *normalY, real *normalZ, + real* vx1, real* vy1, real* vz1, + real* vx, real* vy, real* vz, + int *indices, int* samplingIndices, int* samplingOffsets, real* z0, int level) const = 0; + virtual void getStressQs(real* qs[27], int level) const = 0; + virtual uint getVelocitySize(int level) const = 0; virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const = 0; virtual void getVelocityQs(real* qs[27], int level) const = 0; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp index 0b67fe275492cebe8bb519052c51ed0157167194..30156a7c65ffff00fec92ec1d8a7644236756488 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp @@ -28,7 +28,7 @@ // //! \file LevelGridBuilder.cpp //! \ingroup grid -//! \author Soeren Peters, Stephan Lenz, Martin Schönherr +//! \author Soeren Peters, Stephan Lenz, Martin Sch�nherr //======================================================================================= #include "LevelGridBuilder.h" @@ -84,12 +84,32 @@ void LevelGridBuilder::setSlipBoundaryCondition(SideType sideType, real nomalX, slipBoundaryCondition->side->addIndices(grids, 0, slipBoundaryCondition); slipBoundaryCondition->fillSlipNormalLists(); - boundaryConditions[0]->slipBoundaryConditions.push_back(slipBoundaryCondition); *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Slip BC on level " << 0 << " with " << (int)slipBoundaryCondition->indices.size() << "\n"; } +void LevelGridBuilder::setStressBoundaryCondition( SideType sideType, + real nomalX, real normalY, real normalZ, + uint samplingOffset, real z0) +{ + SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0); + + auto side = SideFactory::make(sideType); + + stressBoundaryCondition->side = side; + stressBoundaryCondition->side->addIndices(grids, 0, stressBoundaryCondition); + + stressBoundaryCondition->fillStressNormalLists(); + stressBoundaryCondition->fillSamplingOffsetLists(); + stressBoundaryCondition->fillZ0Lists(); + // stressBoundaryCondition->fillSamplingIndices(grids, 0, samplingOffset); //redundant with Side::setStressSamplingIndices but potentially a better approach for cases with complex geometries + + boundaryConditions[0]->stressBoundaryConditions.push_back(stressBoundaryCondition); + + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Stress BC on level " << 0 << " with " << (int)stressBoundaryCondition->indices.size() << "\n"; +} + void LevelGridBuilder::setVelocityBoundaryCondition(SideType sideType, real vx, real vy, real vz) { if (sideType == SideType::GEOMETRY) @@ -167,7 +187,9 @@ void LevelGridBuilder::setNoSlipBoundaryCondition(SideType sideType) noSlipBoundaryCondition->side = side; noSlipBoundaryCondition->side->addIndices(grids, level, noSlipBoundaryCondition); - boundaryConditions[level]->noSlipBoundaryConditions.push_back(noSlipBoundaryCondition); + noSlipBoundaryCondition->fillVelocityLists(); + + boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition); //now effectively just a wrapper for velocityBC with zero velocity. No distinction in Gridgenerator. } } @@ -341,7 +363,7 @@ void LevelGridBuilder::getSlipValues(real* normalX, real* normalY, real* normalZ for (uint index = 0; index < boundaryCondition->indices.size(); index++) { indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[index]) + 1; - + normalX[allIndicesCounter] = boundaryCondition->getNormalx(index); normalY[allIndicesCounter] = boundaryCondition->getNormaly(index); normalZ[allIndicesCounter] = boundaryCondition->getNormalz(index); @@ -366,6 +388,57 @@ void LevelGridBuilder::getSlipQs(real* qs[27], int level) const } } +uint LevelGridBuilder::getStressSize(int level) const +{ + uint size = 0; + for (auto boundaryCondition : boundaryConditions[level]->stressBoundaryConditions) + { + size += uint(boundaryCondition->indices.size()); + } + return size; +} + +void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ, + real* vx, real* vy, real* vz, + real* vx1, real* vy1, real* vz1, + int* indices, int* samplingIndices, int* samplingOffset, real* z0, int level) const +{ + + int allIndicesCounter = 0; + for (auto boundaryCondition : boundaryConditions[level]->stressBoundaryConditions) + { + for (uint index = 0; index < boundaryCondition->indices.size(); index++) + { + indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[index]) + 1; + samplingIndices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->velocitySamplingIndices[index]) + 1; + + normalX[allIndicesCounter] = boundaryCondition->getNormalx(index); + normalY[allIndicesCounter] = boundaryCondition->getNormaly(index); + normalZ[allIndicesCounter] = boundaryCondition->getNormalz(index); + + samplingOffset[allIndicesCounter] = boundaryCondition->getSamplingOffset(index); + z0[allIndicesCounter] = boundaryCondition->getZ0(index); + allIndicesCounter++; + } + } +} + +void LevelGridBuilder::getStressQs(real* qs[27], int level) const +{ + int allIndicesCounter = 0; + for (auto boundaryCondition : boundaryConditions[level]->stressBoundaryConditions) + { + for (uint index = 0; index < boundaryCondition->indices.size(); index++) + { + for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++) + { + qs[dir][allIndicesCounter] = boundaryCondition->qs[index][dir]; + } + allIndicesCounter++; + } + } +} + uint LevelGridBuilder::getVelocitySize(int level) const { uint size = 0; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index f2325435d99140f33eee9844c13908de87788558..f3d21cf130aaaf5caac78c8828f35951ebd4e510 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -28,7 +28,7 @@ // //! \file LevelGridBuilder.h //! \ingroup grid -//! \author Soeren Peters, Stephan Lenz, Martin Schönherr +//! \author Soeren Peters, Stephan Lenz, Martin Sch�nherr //======================================================================================= #ifndef LEVEL_GRID_BUILDER_H #define LEVEL_GRID_BUILDER_H @@ -54,6 +54,7 @@ class BoundingBox; class Side; class VelocityBoundaryCondition; class SlipBoundaryCondition; +class StressBoundaryCondition; class PressureBoundaryCondition; class GeometryBoundaryCondition; enum class SideType; @@ -73,6 +74,7 @@ public: GRIDGENERATOR_EXPORT virtual ~LevelGridBuilder(); GRIDGENERATOR_EXPORT void setSlipBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ); + GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0); GRIDGENERATOR_EXPORT void setVelocityBoundaryCondition(SideType sideType, real vx, real vy, real vz); GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho); GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z); @@ -99,6 +101,13 @@ public: GRIDGENERATOR_EXPORT virtual void getSlipValues(real* normalX, real* normalY, real* normalZ, int* indices, int level) const override; GRIDGENERATOR_EXPORT virtual void getSlipQs(real* qs[27], int level) const override; + GRIDGENERATOR_EXPORT uint getStressSize(int level) const override; + GRIDGENERATOR_EXPORT virtual void getStressValues( real* normalX, real* normalY, real* normalZ, + real* vx, real* vy, real* vz, + real* vx1, real* vy1, real* vz1, + int* indices, int* samplingIndices, int* samplingOffsets, real* z0, int level) const override; + GRIDGENERATOR_EXPORT virtual void getStressQs(real* qs[27], int level) const override; + GRIDGENERATOR_EXPORT uint getVelocitySize(int level) const override; GRIDGENERATOR_EXPORT virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const override; GRIDGENERATOR_EXPORT virtual void getVelocityQs(real* qs[27], int level) const override; @@ -127,11 +136,13 @@ protected: std::vector<SPtr<SlipBoundaryCondition>> slipBoundaryConditions; + std::vector<SPtr<StressBoundaryCondition>> stressBoundaryConditions; + std::vector<SPtr<VelocityBoundaryCondition>> velocityBoundaryConditions; std::vector<SPtr<PressureBoundaryCondition>> pressureBoundaryConditions; - std::vector<SPtr<VelocityBoundaryCondition> > noSlipBoundaryConditions; + std::vector<SPtr<VelocityBoundaryCondition>> noSlipBoundaryConditions; SPtr<GeometryBoundaryCondition> geometryBoundaryCondition; }; diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp index 56a0cc6870a59de9116c3ac3837db7e08f4308b5..7eda4f9b8e5a374347b8572f3a28a947be5ad9cb 100644 --- a/src/gpu/GridGenerator/grid/GridImp.cpp +++ b/src/gpu/GridGenerator/grid/GridImp.cpp @@ -186,7 +186,7 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers) #pragma omp parallel for for (int index = 0; index < (int)this->size; index++) this->findEndOfGridStopperNode(index); - + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Grid created: " << "from (" << this->startX << ", " << this->startY << ", " << this->startZ << ") to (" << this->endX << ", " << this->endY << ", " << this->endZ << ")\n" << "nodes: " << this->nx << " x " << this->ny << " x " << this->nz << " = " << this->size << "\n"; @@ -440,7 +440,7 @@ void GridImp::findEndOfGridStopperNode(uint index) else this->field.setFieldEntryToStopperOutOfGridBoundary(index); } - + if (isValidEndOfGridBoundaryStopper(index)) this->field.setFieldEntryToStopperOutOfGridBoundary(index); } @@ -1459,7 +1459,6 @@ void GridImp::calculateQs(const uint index, const Vertex &point, Object* object) this->qPatches[ this->qIndices[index] ] = 0; - //printf("%d %f \n", this->qIndices[index], subdistance); } } } diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h index 77a3cb0014c6bb9c4d69fe4b62e2fb9646539c89..b096f5ff85dcd725ff065dbb6fc31d75c016c869 100644 --- a/src/gpu/GridGenerator/grid/GridImp.h +++ b/src/gpu/GridGenerator/grid/GridImp.h @@ -28,7 +28,7 @@ // //! \file GridImp.h //! \ingroup grid -//! \author Soeren Peters, Stephan Lenz, Martin Schönherr +//! \author Soeren Peters, Stephan Lenz, Martin Sch�nherr //======================================================================================= #ifndef GRID_IMP_H #define GRID_IMP_H @@ -197,6 +197,7 @@ public: void fixRefinementIntoWall(uint xIndex, uint yIndex, uint zIndex, int dir); void findStopperNode(uint index); void findEndOfGridStopperNode(uint index); + void findEndOfGridStopperPeriodicNode(uint index); void findSolidStopperNode(uint index); void findBoundarySolidNode(uint index); @@ -209,7 +210,7 @@ public: bool isNode(uint index, char type) const; bool nodeInNextCellIs(int index, char type) const; bool hasAllNeighbors(uint index) const; - bool hasNeighborOfType(uint index, char type)const; + bool hasNeighborOfType(uint index, char type) const; bool cellContainsOnly(Cell &cell, char type) const; bool cellContainsOnly(Cell &cell, char typeA, char typeB) const; diff --git a/src/gpu/GridGenerator/grid/NodeValues.h b/src/gpu/GridGenerator/grid/NodeValues.h index c726fdf85c8199633e118d8f8a5365ee658d4e6a..b8312b0673337d11b4bdf0b8052e89d92ce127ef 100644 --- a/src/gpu/GridGenerator/grid/NodeValues.h +++ b/src/gpu/GridGenerator/grid/NodeValues.h @@ -56,6 +56,7 @@ static constexpr char BC_SOLID = 22; static constexpr char BC_SLIP = 23; static constexpr char BC_NOSLIP = 24; static constexpr char BC_OUTFLOW = 25; +static constexpr char BC_STRESS = 26; static constexpr char STOPPER_OUT_OF_GRID = 30; static constexpr char STOPPER_COARSE_UNDER_FINE = 31; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 1fec35ed1cf86b09c04bf861a3386cab3b35410d..17d01e57e4c34894e0e0551dd7443dfe92582240 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -18,7 +18,7 @@ void updateGrid27(Parameter* para, std::vector < SPtr< Kernel>>& kernels) { ////////////////////////////////////////////////////////////////////////// - + if( level != para->getFine() ) { updateGrid27(para, comm, cudaManager, pm, level+1, t, kernels); @@ -26,35 +26,35 @@ void updateGrid27(Parameter* para, } ////////////////////////////////////////////////////////////////////////// - + collision(para, pm, level, t, kernels); - + ////////////////////////////////////////////////////////////////////////// - + exchangeMultiGPU(para, comm, cudaManager, level); - + ////////////////////////////////////////////////////////////////////////// - + postCollisionBC(para, level, t); - + ////////////////////////////////////////////////////////////////////////// swapBetweenEvenAndOddTimestep(para, level); ////////////////////////////////////////////////////////////////////////// - - if (para->getUseWale()) + + if (para->getUseWale()) calcMacroscopicQuantities(para, level); if (para->getUseTurbulentViscosity()) calcTurbulentViscosity(para, level); - - ////////////////////////////////////////////////////////////////////////// - + + ////////////////////////////////////////////////////////////////////////// + preCollisionBC(para, cudaManager, level, t); - + ////////////////////////////////////////////////////////////////////////// - + if( level != para->getFine() ) { fineToCoarse(para, level); @@ -63,10 +63,11 @@ void updateGrid27(Parameter* para, coarseToFine(para, level); } - + interactWithActuators(para, cudaManager, level, t); - + interactWithProbes(para, cudaManager, level, t); + ////////////////////////////////////////////////////////////////////////// } void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels) @@ -274,7 +275,6 @@ void postCollisionBC(Parameter* para, int level, unsigned int t) ////////////////////////////////////////////////////////////////////////// // S L I P ////////////////////////////////////////////////////////////////////////// - if (para->getParD(level)->kSlipQ > 0) { //QSlipDev27( para->getParD(level)->numberofthreads, para->getParD(level)->d0SP.f[0], para->getParD(level)->QSlip.k, @@ -286,10 +286,46 @@ void postCollisionBC(Parameter* para, int level, unsigned int t) QSlipDevComp27( para->getParD(level)->numberofthreads, para->getParD(level)->d0SP.f[0], para->getParD(level)->QSlip.k, para->getParD(level)->QSlip.q27[0], para->getParD(level)->kSlipQ, para->getParD(level)->omega, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + para->getParD(level)->turbViscosity, para->getUseTurbulentViscosity(), para->getParD(level)->size_Mat_SP, para->getParD(level)->evenOrOdd); getLastCudaError("QSlipDev27 execution failed"); } + ////////////////////////////////////////////////////////////////////////// + // S T R E S S (wall model) + ////////////////////////////////////////////////////////////////////////// + if (para->getParD(level)->kStressQ > 0) + { + // QStressDevComp27( para->getParD(level)->numberofthreads, para->getParD(level)->d0SP.f[0], + // para->getParD(level)->QStress.k, para->getParD(level)->QStress.kN, + // para->getParD(level)->QStress.q27[0], para->getParD(level)->kStressQ, + // para->getParD(level)->omega, para->getParD(level)->turbViscosity, + // para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vy_SP, + // para->getParD(level)->QStress.normalX, para->getParD(level)->QStress.normalY, para->getParD(level)->QStress.normalZ, + // para->getParD(level)->QStress.Vx, para->getParD(level)->QStress.Vy, para->getParD(level)->QStress.Vz, + // para->getParD(level)->QStress.Vx1, para->getParD(level)->QStress.Vy1, para->getParD(level)->QStress.Vz1, + // para->getParD(level)->wallModel.samplingOffset, para->getParD(level)->wallModel.z0, + // para->getHasWallModelMonitor(), para->getParD(level)->wallModel.u_star, + // para->getParD(level)->wallModel.Fx, para->getParD(level)->wallModel.Fy, para->getParD(level)->wallModel.Fz, + // para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + // para->getParD(level)->size_Mat_SP, para->getParD(level)->evenOrOdd); + // getLastCudaError("QStressDevComp27 execution failed"); + + BBStressDev27( para->getParD(level)->numberofthreads, para->getParD(level)->d0SP.f[0], + para->getParD(level)->QStress.k, para->getParD(level)->QStress.kN, + para->getParD(level)->QStress.q27[0], para->getParD(level)->kStressQ, + para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vy_SP, + para->getParD(level)->QStress.normalX, para->getParD(level)->QStress.normalY, para->getParD(level)->QStress.normalZ, + para->getParD(level)->QStress.Vx, para->getParD(level)->QStress.Vy, para->getParD(level)->QStress.Vz, + para->getParD(level)->QStress.Vx1, para->getParD(level)->QStress.Vy1, para->getParD(level)->QStress.Vz1, + para->getParD(level)->wallModel.samplingOffset, para->getParD(level)->wallModel.z0, + para->getHasWallModelMonitor(), para->getParD(level)->wallModel.u_star, + para->getParD(level)->wallModel.Fx, para->getParD(level)->wallModel.Fy, para->getParD(level)->wallModel.Fz, + para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + para->getParD(level)->size_Mat_SP, para->getParD(level)->evenOrOdd); + getLastCudaError("BBStressDevice27 execution failed"); + } + ////////////////////////////////////////////////////////////////////////// // G E O M E T R Y ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index f165666f56617bb9d47a9c37a2fe8f5629511b35..9f2bfa4d2ac004237d7a7e62d04496089b05db61 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -112,6 +112,53 @@ void GridGenerator::allocArrays_BoundaryValues() cudaMemoryManager->cudaCopyPress(level); } } + + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { + const auto numberOfSlipValues = int(builder->getSlipSize(level)); + + std::cout << "size slip level " << level << " : " << numberOfSlipValues << std::endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + para->getParH(level)->QSlip.kQ = numberOfSlipValues; + para->getParD(level)->QSlip.kQ = numberOfSlipValues; + para->getParH(level)->kSlipQ = numberOfSlipValues; + para->getParD(level)->kSlipQ = numberOfSlipValues; + para->getParH(level)->kSlipQread = numberOfSlipValues * para->getD3Qxx(); + para->getParD(level)->kSlipQread = numberOfSlipValues * para->getD3Qxx(); + if (numberOfSlipValues > 1) + { + cudaMemoryManager->cudaAllocSlipBC(level); + builder->getSlipValues(para->getParH(level)->QSlip.normalX, para->getParH(level)->QSlip.normalY, para->getParH(level)->QSlip.normalZ, para->getParH(level)->QSlip.k, level); + cudaMemoryManager->cudaCopySlipBC(level); + } + } + + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { + const auto numberOfStressValues = int(builder->getStressSize(level)); + + std::cout << "size stress level " << level << " : " << numberOfStressValues << std::endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + para->getParH(level)->QStress.kQ = numberOfStressValues; + para->getParD(level)->QStress.kQ = numberOfStressValues; + para->getParH(level)->kStressQ = numberOfStressValues; + para->getParD(level)->kStressQ = numberOfStressValues; + para->getParH(level)->kStressQread = numberOfStressValues * para->getD3Qxx(); + para->getParD(level)->kStressQread = numberOfStressValues * para->getD3Qxx(); + + if (numberOfStressValues > 1) + { + cudaMemoryManager->cudaAllocStressBC(level); + cudaMemoryManager->cudaAllocWallModel(level, para->getHasWallModelMonitor()); + builder->getStressValues( para->getParH(level)->QStress.normalX, para->getParH(level)->QStress.normalY, para->getParH(level)->QStress.normalZ, + para->getParH(level)->QStress.Vx, para->getParH(level)->QStress.Vy, para->getParH(level)->QStress.Vz, + para->getParH(level)->QStress.Vx1, para->getParH(level)->QStress.Vy1, para->getParH(level)->QStress.Vz1, + para->getParH(level)->QStress.k, para->getParH(level)->QStress.kN, + para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0, + level); + + cudaMemoryManager->cudaCopyStressBC(level); + cudaMemoryManager->cudaCopyWallModel(level, para->getHasWallModelMonitor()); + } + } for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { @@ -137,18 +184,6 @@ void GridGenerator::allocArrays_BoundaryValues() builder->getVelocityValues(para->getParH(level)->Qinflow.Vx, para->getParH(level)->Qinflow.Vy, para->getParH(level)->Qinflow.Vz, para->getParH(level)->Qinflow.k, level); - - //for (int i = 0; i < numberOfVelocityValues; i++) - //{ - // std::cout << "index: " << para->getParH(level)->Qinflow.k[i]; - // std::cout << " (x,y,z)" << para->getParH(level)->coordX_SP[para->getParH(level)->Qinflow.k[i]]; - // std::cout << ", " << para->getParH(level)->coordY_SP[para->getParH(level)->Qinflow.k[i]]; - // std::cout << ", " << para->getParH(level)->coordZ_SP[para->getParH(level)->Qinflow.k[i]]; - // std::cout << " geo: " << para->getParH(level)->geoSP[para->getParH(level)->Qinflow.k[i]]; - // std::cout << std::endl; - //} - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopyVeloBC(level); @@ -697,7 +732,97 @@ void GridGenerator::allocArrays_BoundaryQs() }//ende if }//ende oberste for schleife + for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { + int numberOfSlipValues = (int)builder->getSlipSize(i); + if (numberOfSlipValues > 0) + { + std::cout << "size Slip: " << i << " : " << numberOfSlipValues << std::endl; + //cout << "Groesse Pressure: " << i << " : " << temp1 << "MyID: " << para->getMyID() << endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //preprocessing + real* QQ = para->getParH(i)->QSlip.q27[0]; + unsigned int sizeQ = para->getParH(i)->QSlip.kQ; + QforBoundaryConditions Q; + Q.q27[dirE] = &QQ[dirE *sizeQ]; + Q.q27[dirW] = &QQ[dirW *sizeQ]; + Q.q27[dirN] = &QQ[dirN *sizeQ]; + Q.q27[dirS] = &QQ[dirS *sizeQ]; + Q.q27[dirT] = &QQ[dirT *sizeQ]; + Q.q27[dirB] = &QQ[dirB *sizeQ]; + Q.q27[dirNE] = &QQ[dirNE *sizeQ]; + Q.q27[dirSW] = &QQ[dirSW *sizeQ]; + Q.q27[dirSE] = &QQ[dirSE *sizeQ]; + Q.q27[dirNW] = &QQ[dirNW *sizeQ]; + Q.q27[dirTE] = &QQ[dirTE *sizeQ]; + Q.q27[dirBW] = &QQ[dirBW *sizeQ]; + Q.q27[dirBE] = &QQ[dirBE *sizeQ]; + Q.q27[dirTW] = &QQ[dirTW *sizeQ]; + Q.q27[dirTN] = &QQ[dirTN *sizeQ]; + Q.q27[dirBS] = &QQ[dirBS *sizeQ]; + Q.q27[dirBN] = &QQ[dirBN *sizeQ]; + Q.q27[dirTS] = &QQ[dirTS *sizeQ]; + Q.q27[dirZERO] = &QQ[dirZERO*sizeQ]; + Q.q27[dirTNE] = &QQ[dirTNE *sizeQ]; + Q.q27[dirTSW] = &QQ[dirTSW *sizeQ]; + Q.q27[dirTSE] = &QQ[dirTSE *sizeQ]; + Q.q27[dirTNW] = &QQ[dirTNW *sizeQ]; + Q.q27[dirBNE] = &QQ[dirBNE *sizeQ]; + Q.q27[dirBSW] = &QQ[dirBSW *sizeQ]; + Q.q27[dirBSE] = &QQ[dirBSE *sizeQ]; + Q.q27[dirBNW] = &QQ[dirBNW *sizeQ]; + + builder->getSlipQs(Q.q27, i); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + cudaMemoryManager->cudaCopySlipBC(i); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + }//ende if + }//ende oberste for schleife + for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { + int numberOfStressValues = (int)builder->getStressSize(i); + if (numberOfStressValues > 0) + { + std::cout << "size Stress: " << i << " : " << numberOfStressValues << std::endl; + //cout << "Groesse Pressure: " << i << " : " << temp1 << "MyID: " << para->getMyID() << endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //preprocessing + real* QQ = para->getParH(i)->QStress.q27[0]; + unsigned int sizeQ = para->getParH(i)->QStress.kQ; + QforBoundaryConditions Q; + Q.q27[dirE] = &QQ[dirE *sizeQ]; + Q.q27[dirW] = &QQ[dirW *sizeQ]; + Q.q27[dirN] = &QQ[dirN *sizeQ]; + Q.q27[dirS] = &QQ[dirS *sizeQ]; + Q.q27[dirT] = &QQ[dirT *sizeQ]; + Q.q27[dirB] = &QQ[dirB *sizeQ]; + Q.q27[dirNE] = &QQ[dirNE *sizeQ]; + Q.q27[dirSW] = &QQ[dirSW *sizeQ]; + Q.q27[dirSE] = &QQ[dirSE *sizeQ]; + Q.q27[dirNW] = &QQ[dirNW *sizeQ]; + Q.q27[dirTE] = &QQ[dirTE *sizeQ]; + Q.q27[dirBW] = &QQ[dirBW *sizeQ]; + Q.q27[dirBE] = &QQ[dirBE *sizeQ]; + Q.q27[dirTW] = &QQ[dirTW *sizeQ]; + Q.q27[dirTN] = &QQ[dirTN *sizeQ]; + Q.q27[dirBS] = &QQ[dirBS *sizeQ]; + Q.q27[dirBN] = &QQ[dirBN *sizeQ]; + Q.q27[dirTS] = &QQ[dirTS *sizeQ]; + Q.q27[dirZERO] = &QQ[dirZERO*sizeQ]; + Q.q27[dirTNE] = &QQ[dirTNE *sizeQ]; + Q.q27[dirTSW] = &QQ[dirTSW *sizeQ]; + Q.q27[dirTSE] = &QQ[dirTSE *sizeQ]; + Q.q27[dirTNW] = &QQ[dirTNW *sizeQ]; + Q.q27[dirBNE] = &QQ[dirBNE *sizeQ]; + Q.q27[dirBSW] = &QQ[dirBSW *sizeQ]; + Q.q27[dirBSE] = &QQ[dirBSE *sizeQ]; + Q.q27[dirBNW] = &QQ[dirBNW *sizeQ]; + + builder->getStressQs(Q.q27, i); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + cudaMemoryManager->cudaCopyStressBC(i); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + }//ende if + }//ende oberste for schleife for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { const auto numberOfVelocityNodes = int(builder->getVelocitySize(i)); diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index 6685a34b98c03067426db7452b63c060e1723058..4e8eb124731cffb54a51018fa6f06da45f671c73 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -1355,15 +1355,19 @@ void CudaMemoryManager::cudaAllocSlipBC(int lev) //Host checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.q27[0]), parameter->getD3Qxx()*mem_size_Q_q )); checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.k), mem_size_Q_k )); - //checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.qread), mem_size_Q_q_read ));//Geller - //checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.valueQ), mem_size_Q_value ));//Geller + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.normalX), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.normalY), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QSlip.normalZ), mem_size_Q_q )); //Device checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QSlip.q27[0]), parameter->getD3Qxx()* mem_size_Q_q )); checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QSlip.k), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QSlip.normalX), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QSlip.normalY), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QSlip.normalZ), mem_size_Q_q )); ////////////////////////////////////////////////////////////////////////// - double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q; + double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q + 3.0*(double)mem_size_Q_q;; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopySlipBC(int lev) @@ -1371,15 +1375,150 @@ void CudaMemoryManager::cudaCopySlipBC(int lev) unsigned int mem_size_Q_k = sizeof(int)*parameter->getParH(lev)->QSlip.kQ; unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->QSlip.kQ; - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.q27[0], parameter->getParH(lev)->QSlip.q27[0], parameter->getD3Qxx()* mem_size_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.k, parameter->getParH(lev)->QSlip.k, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.q27[0], parameter->getParH(lev)->QSlip.q27[0], parameter->getD3Qxx()* mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.k, parameter->getParH(lev)->QSlip.k, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.normalX, parameter->getParH(lev)->QSlip.normalX, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.normalY, parameter->getParH(lev)->QSlip.normalY, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QSlip.normalZ, parameter->getParH(lev)->QSlip.normalZ, mem_size_Q_q, cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeSlipBC(int lev) { checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.q27[0])); checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.k)); - //checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.valueQ)); - //checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.qread)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.normalX)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.normalY)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QSlip.normalZ)); +} +//Stress +void CudaMemoryManager::cudaAllocStressBC(int lev) +{ + unsigned int mem_size_Q_k = sizeof(int)*parameter->getParH(lev)->QStress.kQ; + unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->QStress.kQ; + + //Host + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.q27[0]), parameter->getD3Qxx()*mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.k), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.kN), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.normalX), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.normalY), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.normalZ), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.Vx), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.Vy), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.Vz), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.Vx1), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.Vy1), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->QStress.Vz1), mem_size_Q_q )); + + //Device + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.q27[0]), parameter->getD3Qxx()* mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.k), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.kN), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.normalX), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.normalY), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.normalZ), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.Vx), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.Vy), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.Vz), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.Vx1), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.Vy1), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->QStress.Vz1), mem_size_Q_q )); + + ////////////////////////////////////////////////////////////////////////// + double tmp = 2*(double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q + 9.0*(double)mem_size_Q_q; + setMemsizeGPU(tmp, false); +} +void CudaMemoryManager::cudaCopyStressBC(int lev) +{ + unsigned int mem_size_Q_k = sizeof(int)*parameter->getParH(lev)->QStress.kQ; + unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->QStress.kQ; + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.q27[0], parameter->getParH(lev)->QStress.q27[0], parameter->getD3Qxx()* mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.k, parameter->getParH(lev)->QStress.k, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.kN, parameter->getParH(lev)->QStress.kN, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.normalX, parameter->getParH(lev)->QStress.normalX, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.normalY, parameter->getParH(lev)->QStress.normalY, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.normalZ, parameter->getParH(lev)->QStress.normalZ, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.Vx, parameter->getParH(lev)->QStress.Vx, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.Vy, parameter->getParH(lev)->QStress.Vy, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.Vz, parameter->getParH(lev)->QStress.Vz, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.Vx1, parameter->getParH(lev)->QStress.Vx1, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.Vy1, parameter->getParH(lev)->QStress.Vy1, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->QStress.Vz1, parameter->getParH(lev)->QStress.Vz1, mem_size_Q_q, cudaMemcpyHostToDevice)); + +} +void CudaMemoryManager::cudaFreeStressBC(int lev) +{ + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.q27[0])); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.k)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.kN)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.normalX)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.normalY)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.normalZ)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.Vx)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.Vy)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.Vz)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.Vx1)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.Vy1)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->QStress.Vz1)); +} +// Wall model +void CudaMemoryManager::cudaAllocWallModel(int lev, bool hasWallModelMonitor) +{ + unsigned int mem_size_Q_k = sizeof(int)*parameter->getParH(lev)->QStress.kQ; + unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->QStress.kQ; + + //Host + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->wallModel.samplingOffset), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->wallModel.z0), mem_size_Q_q )); + if(hasWallModelMonitor) + { + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->wallModel.u_star), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->wallModel.Fx), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->wallModel.Fy), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->wallModel.Fz), mem_size_Q_q )); + } + + //Device + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->wallModel.samplingOffset), mem_size_Q_k)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->wallModel.z0), mem_size_Q_q)); + if(hasWallModelMonitor) + { + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->wallModel.u_star), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->wallModel.Fx), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->wallModel.Fy), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->wallModel.Fz), mem_size_Q_q )); + } + + ////////////////////////////////////////////////////////////////////////// + double tmp = (double)mem_size_Q_k + (double)mem_size_Q_q; + setMemsizeGPU(tmp, false); +} +void CudaMemoryManager::cudaCopyWallModel(int lev, bool hasWallModelMonitor) +{ + unsigned int mem_size_Q_k = sizeof(int)*parameter->getParH(lev)->QStress.kQ; + unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->QStress.kQ; + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->wallModel.samplingOffset, parameter->getParH(lev)->wallModel.samplingOffset, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->wallModel.z0, parameter->getParH(lev)->wallModel.z0, mem_size_Q_q, cudaMemcpyHostToDevice)); + if(hasWallModelMonitor) + { + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->wallModel.u_star, parameter->getParH(lev)->wallModel.u_star, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->wallModel.Fx, parameter->getParH(lev)->wallModel.Fx, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->wallModel.Fy, parameter->getParH(lev)->wallModel.Fy, mem_size_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->wallModel.Fz, parameter->getParH(lev)->wallModel.Fz, mem_size_Q_q, cudaMemcpyHostToDevice)); + } +} +void CudaMemoryManager::cudaFreeWallModel(int lev, bool hasWallModelMonitor) +{ + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->wallModel.samplingOffset)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->wallModel.z0)); + if(hasWallModelMonitor) + { + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->wallModel.u_star)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->wallModel.Fx)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->wallModel.Fy)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->wallModel.Fz)); + } } //Test roundoff error @@ -2875,18 +3014,18 @@ void CudaMemoryManager::cudaFreeProbeDistances(Probe* probe, int level) void CudaMemoryManager::cudaAllocProbeIndices(Probe* probe, int level) { - size_t tmp = sizeof(int)*probe->getProbeStruct(level)->nPoints; + size_t tmp = sizeof(int)*probe->getProbeStruct(level)->nIndices; checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->pointIndicesH, tmp) ); checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->pointIndicesD, tmp) ); setMemsizeGPU(1.f*tmp, false); } void CudaMemoryManager::cudaCopyProbeIndicesHtoD(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesD, probe->getProbeStruct(level)->pointIndicesH, sizeof(int)*probe->getProbeStruct(level)->nPoints, cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesD, probe->getProbeStruct(level)->pointIndicesH, sizeof(int)*probe->getProbeStruct(level)->nIndices, cudaMemcpyHostToDevice) ); } void CudaMemoryManager::cudaCopyProbeIndicesDtoH(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesH, probe->getProbeStruct(level)->pointIndicesD, sizeof(int)*probe->getProbeStruct(level)->nPoints, cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->pointIndicesH, probe->getProbeStruct(level)->pointIndicesD, sizeof(int)*probe->getProbeStruct(level)->nIndices, cudaMemcpyDeviceToHost) ); } void CudaMemoryManager::cudaFreeProbeIndices(Probe* probe, int level) { @@ -2899,8 +3038,11 @@ void CudaMemoryManager::cudaAllocProbeQuantityArray(Probe* probe, int level) size_t tmp = sizeof(real)*probe->getProbeStruct(level)->nArrays*probe->getProbeStruct(level)->nPoints; checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesArrayH, tmp) ); - checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->quantitiesArrayD, tmp) ); - setMemsizeGPU(1.f*tmp, false); + if(probe->getHasDeviceQuantityArray()) + { + checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->quantitiesArrayD, tmp) ); + setMemsizeGPU(1.f*tmp, false); + } } void CudaMemoryManager::cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level) @@ -2914,13 +3056,14 @@ void CudaMemoryManager::cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level) void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level) { checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesArrayH) ); - checkCudaErrors( cudaFree (probe->getProbeStruct(level)->quantitiesArrayD) ); + if(probe->getHasDeviceQuantityArray()) + checkCudaErrors( cudaFree (probe->getProbeStruct(level)->quantitiesArrayD) ); } void CudaMemoryManager::cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int level) { - size_t tmpA = int(PostProcessingVariable::LAST)*sizeof(int); - size_t tmpQ = int(PostProcessingVariable::LAST)*sizeof(bool); + size_t tmpA = int(Statistic::LAST)*sizeof(int); + size_t tmpQ = int(Statistic::LAST)*sizeof(bool); checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->quantitiesH, tmpQ) ); checkCudaErrors( cudaMalloc ((void**) &probe->getProbeStruct(level)->quantitiesD, tmpQ) ); checkCudaErrors( cudaMallocHost((void**) &probe->getProbeStruct(level)->arrayOffsetsH, tmpA) ); @@ -2930,14 +3073,14 @@ void CudaMemoryManager::cudaAllocProbeQuantitiesAndOffsets(Probe* probe, int lev void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsHtoD(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesD, probe->getProbeStruct(level)->quantitiesH, int(Statistic::LAST)*sizeof(bool), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsD, probe->getProbeStruct(level)->arrayOffsetsH, int(Statistic::LAST)*sizeof(int), cudaMemcpyHostToDevice) ); } void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(PostProcessingVariable::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(PostProcessingVariable::LAST)*sizeof(int), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(Statistic::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(Statistic::LAST)*sizeof(int), cudaMemcpyDeviceToHost) ); } void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level) { diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h index e81497cb4e5824afabc3303a984817fe3a2ff68b..27b16240cb63b4505017a7dc50e3a5fc9b19ce82 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h @@ -164,6 +164,14 @@ public: void cudaAllocSlipBC(int lev); void cudaCopySlipBC(int lev); void cudaFreeSlipBC(int lev); + + void cudaAllocStressBC(int lev); + void cudaCopyStressBC(int lev); + void cudaFreeStressBC(int lev); + + void cudaAllocWallModel(int lev, bool hasWallModelMonitor); + void cudaCopyWallModel(int lev, bool hasWallModelMonitor); + void cudaFreeWallModel(int lev, bool hasWallModelMonitor); void cudaAllocGeomValuesBC(int lev); void cudaCopyGeomValuesBC(int lev); diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index dfdbac44d197e55e3e78eb794692fd9443cb7ab6..f7b89610d09cec436ebc6cb0e4473dbf6245c847 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -963,6 +963,8 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, + real* turbViscosity, + bool useTurbViscosity, unsigned int size_Mat, bool evenOrOdd); @@ -996,6 +998,70 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads, unsigned int size_Mat, bool evenOrOdd); +extern "C" void QStressDevComp27(unsigned int numberOfThreads, + real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_el, + real* vy_el, + real* vz_el, + real* vx_w_mean, + real* vy_w_mean, + real* vz_w_mean, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star, + real* Fx, + real* Fy, + real* Fz, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd); + +extern "C" void BBStressDev27( unsigned int numberOfThreads, + real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_el, + real* vy_el, + real* vz_el, + real* vx_w_mean, + real* vy_w_mean, + real* vz_w_mean, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star, + real* Fx, + real* Fy, + real* Fz, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd); + extern "C" void QPressDev27(unsigned int numberOfThreads, int nx, int ny, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index 288db43e7bcd36dc4d187982b86178d345601094..d7d38baf2bcf6f5d3abe342359b7676f4ad8266b 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -916,6 +916,18 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD, unsigned int size_Mat, bool evenOrOdd); +extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD, + int* k_Q, + real* QQ, + unsigned int sizeQ, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* turbViscosity, + unsigned int size_Mat, + bool evenOrOdd); + extern "C" __global__ void QSlipGeomDeviceComp27(real* DD, int* k_Q, real* QQ, @@ -944,6 +956,69 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD, unsigned int size_Mat, bool evenOrOdd); +// Stress BCs (wall model) +extern "C" __global__ void QStressDeviceComp27(real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_bc, + real* vy_bc, + real* vz_bc, + real* vx1, + real* vy1, + real* vz1, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd); + +extern "C" __global__ void BBStressDevice27( real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_bc, + real* vy_bc, + real* vz_bc, + real* vx1, + real* vy1, + real* vz1, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd); + //Pressure BCs extern "C" __global__ void QPressDevice27(int inx, int iny, diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index 8b80e5ea9dbed3deef4c4332b2d43bf62ba9e48b..4dce487fc98ee077798f7f75bfbf96906e7585b0 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -3604,6 +3604,8 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, + real* turbViscosity, + bool useTurbViscosity, unsigned int size_Mat, bool evenOrOdd) { @@ -3621,7 +3623,24 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads, } dim3 gridQ(Grid1, Grid2); dim3 threads(numberOfThreads, 1, 1 ); - + + if(useTurbViscosity) + { + QSlipDeviceComp27TurbViscosity<<< gridQ, threads >>> (DD, + k_Q, + QQ, + sizeQ, + om1, + neighborX, + neighborY, + neighborZ, + turbViscosity, + size_Mat, + evenOrOdd); + getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed"); + } + else + { QSlipDeviceComp27<<< gridQ, threads >>> (DD, k_Q, QQ, @@ -3632,7 +3651,8 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads, neighborZ, size_Mat, evenOrOdd); - getLastCudaError("QSlipDeviceComp27 execution failed"); + getLastCudaError("QSlipDeviceComp27 execution failed"); + } } ////////////////////////////////////////////////////////////////////////// extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads, @@ -3727,6 +3747,167 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads, getLastCudaError("QSlipGeomDeviceComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// +extern "C" void QStressDevComp27(unsigned int numberOfThreads, + real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_bc, + real* vy_bc, + real* vz_bc, + real* vx1, + real* vy1, + real* vz1, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star, + real* Fx, + real* Fy, + real* Fz, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd) +{ + int Grid = (sizeQ / numberOfThreads)+1; + int Grid1, Grid2; + if (Grid>512) + { + Grid1 = 512; + Grid2 = (Grid/Grid1)+1; + } + else + { + Grid1 = 1; + Grid2 = Grid; + } + dim3 gridQ(Grid1, Grid2); + dim3 threads(numberOfThreads, 1, 1 ); + + QStressDeviceComp27<<< gridQ, threads >>> (DD, + k_Q, + k_N, + QQ, + sizeQ, + om1, + turbViscosity, + vx, + vy, + vz, + normalX, + normalY, + normalZ, + vx_bc, + vy_bc, + vz_bc, + vx1, + vy1, + vz1, + samplingOffset, + z0, + hasWallModelMonitor, + u_star, + Fx, + Fy, + Fz, + neighborX, + neighborY, + neighborZ, + size_Mat, + evenOrOdd); + getLastCudaError("QSlipDeviceComp27 execution failed"); +} + +////////////////////////////////////////////////////////////////////////// +extern "C" void BBStressDev27(unsigned int numberOfThreads, + real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_bc, + real* vy_bc, + real* vz_bc, + real* vx1, + real* vy1, + real* vz1, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star, + real* Fx, + real* Fy, + real* Fz, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd) +{ + int Grid = (sizeQ / numberOfThreads)+1; + int Grid1, Grid2; + if (Grid>512) + { + Grid1 = 512; + Grid2 = (Grid/Grid1)+1; + } + else + { + Grid1 = 1; + Grid2 = Grid; + } + dim3 gridQ(Grid1, Grid2); + dim3 threads(numberOfThreads, 1, 1 ); + + BBStressDevice27<<< gridQ, threads >>> (DD, + k_Q, + k_N, + QQ, + sizeQ, + vx, + vy, + vz, + normalX, + normalY, + normalZ, + vx_bc, + vy_bc, + vz_bc, + vx1, + vy1, + vz1, + samplingOffset, + z0, + hasWallModelMonitor, + u_star, + Fx, + Fy, + Fz, + neighborX, + neighborY, + neighborZ, + size_Mat, + evenOrOdd); + getLastCudaError("BBStressDevice27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// extern "C" void QPressDev27(unsigned int numberOfThreads, int nx, int ny, diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu index fc792a2c3a0f7438f4ee0882988a39f7260f21be..e5c017e6b1941f0a7b53e23c70698ccaf7a18987 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu @@ -803,6 +803,7 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD, unsigned int kbne = kb; unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// real f_W = (D.f[dirE ])[ke ]; real f_E = (D.f[dirW ])[kw ]; @@ -1076,6 +1077,7 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD, VeloZ = fac*vx3; if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; + // if (k==10000) printf("AFTER x: %u \t y: %u \t z: %u \n VeloX: %f \t VeloY: %f \t VeloZ: %f \n\n", x,y,z, VeloX,VeloY,VeloZ); feq=c1o54* (drho/*+three*( vx1 +vx3)*/+c9o2*( vx1 +vx3)*( vx1 +vx3) * (c1o1 + drho)-cu_sq); (D.f[dirBW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho; //feq=c1over54* (drho+three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); @@ -1318,7 +1320,673 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD, } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD, + int* k_Q, + real* QQ, + unsigned int sizeQ, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* turbViscosity, + unsigned int size_Mat, + bool evenOrOdd) +{ + Distributions27 D; + if (evenOrOdd==true) + { + D.f[dirE ] = &DD[dirE *size_Mat]; + D.f[dirW ] = &DD[dirW *size_Mat]; + D.f[dirN ] = &DD[dirN *size_Mat]; + D.f[dirS ] = &DD[dirS *size_Mat]; + D.f[dirT ] = &DD[dirT *size_Mat]; + D.f[dirB ] = &DD[dirB *size_Mat]; + D.f[dirNE ] = &DD[dirNE *size_Mat]; + D.f[dirSW ] = &DD[dirSW *size_Mat]; + D.f[dirSE ] = &DD[dirSE *size_Mat]; + D.f[dirNW ] = &DD[dirNW *size_Mat]; + D.f[dirTE ] = &DD[dirTE *size_Mat]; + D.f[dirBW ] = &DD[dirBW *size_Mat]; + D.f[dirBE ] = &DD[dirBE *size_Mat]; + D.f[dirTW ] = &DD[dirTW *size_Mat]; + D.f[dirTN ] = &DD[dirTN *size_Mat]; + D.f[dirBS ] = &DD[dirBS *size_Mat]; + D.f[dirBN ] = &DD[dirBN *size_Mat]; + D.f[dirTS ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirTNE *size_Mat]; + D.f[dirTSW ] = &DD[dirTSW *size_Mat]; + D.f[dirTSE ] = &DD[dirTSE *size_Mat]; + D.f[dirTNW ] = &DD[dirTNW *size_Mat]; + D.f[dirBNE ] = &DD[dirBNE *size_Mat]; + D.f[dirBSW ] = &DD[dirBSW *size_Mat]; + D.f[dirBSE ] = &DD[dirBSE *size_Mat]; + D.f[dirBNW ] = &DD[dirBNW *size_Mat]; + } + else + { + D.f[dirW ] = &DD[dirE *size_Mat]; + D.f[dirE ] = &DD[dirW *size_Mat]; + D.f[dirS ] = &DD[dirN *size_Mat]; + D.f[dirN ] = &DD[dirS *size_Mat]; + D.f[dirB ] = &DD[dirT *size_Mat]; + D.f[dirT ] = &DD[dirB *size_Mat]; + D.f[dirSW ] = &DD[dirNE *size_Mat]; + D.f[dirNE ] = &DD[dirSW *size_Mat]; + D.f[dirNW ] = &DD[dirSE *size_Mat]; + D.f[dirSE ] = &DD[dirNW *size_Mat]; + D.f[dirBW ] = &DD[dirTE *size_Mat]; + D.f[dirTE ] = &DD[dirBW *size_Mat]; + D.f[dirTW ] = &DD[dirBE *size_Mat]; + D.f[dirBE ] = &DD[dirTW *size_Mat]; + D.f[dirBS ] = &DD[dirTN *size_Mat]; + D.f[dirTN ] = &DD[dirBS *size_Mat]; + D.f[dirTS ] = &DD[dirBN *size_Mat]; + D.f[dirBN ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirBSW *size_Mat]; + D.f[dirTSW ] = &DD[dirBNE *size_Mat]; + D.f[dirTSE ] = &DD[dirBNW *size_Mat]; + D.f[dirTNW ] = &DD[dirBSE *size_Mat]; + D.f[dirBNE ] = &DD[dirTSW *size_Mat]; + D.f[dirBSW ] = &DD[dirTNE *size_Mat]; + D.f[dirBSE ] = &DD[dirTNW *size_Mat]; + D.f[dirBNW ] = &DD[dirTSE *size_Mat]; + } + //////////////////////////////////////////////////////////////////////////////// + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + const unsigned k = nx*(ny*z + y) + x; + ////////////////////////////////////////////////////////////////////////// + + if(k<sizeQ) + { + //////////////////////////////////////////////////////////////////////////////// + real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, + *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, + *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, + *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, + *q_dirBSE, *q_dirBNW; + q_dirE = &QQ[dirE *sizeQ]; + q_dirW = &QQ[dirW *sizeQ]; + q_dirN = &QQ[dirN *sizeQ]; + q_dirS = &QQ[dirS *sizeQ]; + q_dirT = &QQ[dirT *sizeQ]; + q_dirB = &QQ[dirB *sizeQ]; + q_dirNE = &QQ[dirNE *sizeQ]; + q_dirSW = &QQ[dirSW *sizeQ]; + q_dirSE = &QQ[dirSE *sizeQ]; + q_dirNW = &QQ[dirNW *sizeQ]; + q_dirTE = &QQ[dirTE *sizeQ]; + q_dirBW = &QQ[dirBW *sizeQ]; + q_dirBE = &QQ[dirBE *sizeQ]; + q_dirTW = &QQ[dirTW *sizeQ]; + q_dirTN = &QQ[dirTN *sizeQ]; + q_dirBS = &QQ[dirBS *sizeQ]; + q_dirBN = &QQ[dirBN *sizeQ]; + q_dirTS = &QQ[dirTS *sizeQ]; + q_dirTNE = &QQ[dirTNE *sizeQ]; + q_dirTSW = &QQ[dirTSW *sizeQ]; + q_dirTSE = &QQ[dirTSE *sizeQ]; + q_dirTNW = &QQ[dirTNW *sizeQ]; + q_dirBNE = &QQ[dirBNE *sizeQ]; + q_dirBSW = &QQ[dirBSW *sizeQ]; + q_dirBSE = &QQ[dirBSE *sizeQ]; + q_dirBNW = &QQ[dirBNW *sizeQ]; + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + + //////////////////////////////////////////////////////////////////////////////// + real f_W = (D.f[dirE ])[ke ]; + real f_E = (D.f[dirW ])[kw ]; + real f_S = (D.f[dirN ])[kn ]; + real f_N = (D.f[dirS ])[ks ]; + real f_B = (D.f[dirT ])[kt ]; + real f_T = (D.f[dirB ])[kb ]; + real f_SW = (D.f[dirNE ])[kne ]; + real f_NE = (D.f[dirSW ])[ksw ]; + real f_NW = (D.f[dirSE ])[kse ]; + real f_SE = (D.f[dirNW ])[knw ]; + real f_BW = (D.f[dirTE ])[kte ]; + real f_TE = (D.f[dirBW ])[kbw ]; + real f_TW = (D.f[dirBE ])[kbe ]; + real f_BE = (D.f[dirTW ])[ktw ]; + real f_BS = (D.f[dirTN ])[ktn ]; + real f_TN = (D.f[dirBS ])[kbs ]; + real f_TS = (D.f[dirBN ])[kbn ]; + real f_BN = (D.f[dirTS ])[kts ]; + real f_BSW = (D.f[dirTNE ])[ktne ]; + real f_BNE = (D.f[dirTSW ])[ktsw ]; + real f_BNW = (D.f[dirTSE ])[ktse ]; + real f_BSE = (D.f[dirTNW ])[ktnw ]; + real f_TSW = (D.f[dirBNE ])[kbne ]; + real f_TNE = (D.f[dirBSW ])[kbsw ]; + real f_TNW = (D.f[dirBSE ])[kbse ]; + real f_TSE = (D.f[dirBNW ])[kbnw ]; + //////////////////////////////////////////////////////////////////////////////// + real vx1, vx2, vx3, drho, feq, q; + drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[dirZERO])[kzero]); + + vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + + (f_E - f_W)) / (c1o1 + drho); + + + vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + + (f_N - f_S)) / (c1o1 + drho); + + vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + + (f_T - f_B)) / (c1o1 + drho); + + real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho); + + ////////////////////////////////////////////////////////////////////////// + if (evenOrOdd==false) + { + D.f[dirE ] = &DD[dirE *size_Mat]; + D.f[dirW ] = &DD[dirW *size_Mat]; + D.f[dirN ] = &DD[dirN *size_Mat]; + D.f[dirS ] = &DD[dirS *size_Mat]; + D.f[dirT ] = &DD[dirT *size_Mat]; + D.f[dirB ] = &DD[dirB *size_Mat]; + D.f[dirNE ] = &DD[dirNE *size_Mat]; + D.f[dirSW ] = &DD[dirSW *size_Mat]; + D.f[dirSE ] = &DD[dirSE *size_Mat]; + D.f[dirNW ] = &DD[dirNW *size_Mat]; + D.f[dirTE ] = &DD[dirTE *size_Mat]; + D.f[dirBW ] = &DD[dirBW *size_Mat]; + D.f[dirBE ] = &DD[dirBE *size_Mat]; + D.f[dirTW ] = &DD[dirTW *size_Mat]; + D.f[dirTN ] = &DD[dirTN *size_Mat]; + D.f[dirBS ] = &DD[dirBS *size_Mat]; + D.f[dirBN ] = &DD[dirBN *size_Mat]; + D.f[dirTS ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirTNE *size_Mat]; + D.f[dirTSW ] = &DD[dirTSW *size_Mat]; + D.f[dirTSE ] = &DD[dirTSE *size_Mat]; + D.f[dirTNW ] = &DD[dirTNW *size_Mat]; + D.f[dirBNE ] = &DD[dirBNE *size_Mat]; + D.f[dirBSW ] = &DD[dirBSW *size_Mat]; + D.f[dirBSE ] = &DD[dirBSE *size_Mat]; + D.f[dirBNW ] = &DD[dirBNW *size_Mat]; + } + else + { + D.f[dirW ] = &DD[dirE *size_Mat]; + D.f[dirE ] = &DD[dirW *size_Mat]; + D.f[dirS ] = &DD[dirN *size_Mat]; + D.f[dirN ] = &DD[dirS *size_Mat]; + D.f[dirB ] = &DD[dirT *size_Mat]; + D.f[dirT ] = &DD[dirB *size_Mat]; + D.f[dirSW ] = &DD[dirNE *size_Mat]; + D.f[dirNE ] = &DD[dirSW *size_Mat]; + D.f[dirNW ] = &DD[dirSE *size_Mat]; + D.f[dirSE ] = &DD[dirNW *size_Mat]; + D.f[dirBW ] = &DD[dirTE *size_Mat]; + D.f[dirTE ] = &DD[dirBW *size_Mat]; + D.f[dirTW ] = &DD[dirBE *size_Mat]; + D.f[dirBE ] = &DD[dirTW *size_Mat]; + D.f[dirBS ] = &DD[dirTN *size_Mat]; + D.f[dirTN ] = &DD[dirBS *size_Mat]; + D.f[dirTS ] = &DD[dirBN *size_Mat]; + D.f[dirBN ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirBSW *size_Mat]; + D.f[dirTSW ] = &DD[dirBNE *size_Mat]; + D.f[dirTSE ] = &DD[dirBNW *size_Mat]; + D.f[dirTNW ] = &DD[dirBSE *size_Mat]; + D.f[dirBNE ] = &DD[dirTSW *size_Mat]; + D.f[dirBSW ] = &DD[dirTNE *size_Mat]; + D.f[dirBSE ] = &DD[dirTNW *size_Mat]; + D.f[dirBNW ] = &DD[dirTSE *size_Mat]; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //Test + //(D.f[dirZERO])[k]=c1o10; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]])); + + real fac = c1o1;//c99o100; + real VeloX = fac*vx1; + real VeloY = fac*vx2; + real VeloZ = fac*vx3; + bool x = false; + bool y = false; + bool z = false; + + q = q_dirE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = c0o1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + x = true; + feq=c2o27* (drho/*+three*( vx1 )*/+c9o2*( vx1 )*( vx1 ) * (c1o1 + drho)-cu_sq); + (D.f[dirW])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX ))/(c1o1+q) - c2o27 * drho; + //feq=c2over27* (drho+three*( vx1 )+c9over2*( vx1 )*( vx1 )-cu_sq); + //(D.f[dirW])[kw]=(one-q)/(one+q)*(f_E-feq*om1)/(one-om1)+(q*(f_E+f_W)-six*c2over27*( VeloX ))/(one+q); + //(D.f[dirW])[kw]=zero; + } + + q = q_dirW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = c0o1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + x = true; + feq=c2o27* (drho/*+three*(-vx1 )*/+c9o2*(-vx1 )*(-vx1 ) * (c1o1 + drho)-cu_sq); + (D.f[dirE])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX ))/(c1o1+q) - c2o27 * drho; + //feq=c2over27* (drho+three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cu_sq); + //(D.f[dirE])[ke]=(one-q)/(one+q)*(f_W-feq*om_turb)/(one-om_turb)+(q*(f_W+f_E)-six*c2over27*(-VeloX ))/(one+q); + //(D.f[dirE])[ke]=zero; + } + + q = q_dirN[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = c0o1; + VeloZ = fac*vx3; + y = true; + feq=c2o27* (drho/*+three*( vx2 )*/+c9o2*( vx2 )*( vx2 ) * (c1o1 + drho)-cu_sq); + (D.f[dirS])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY ))/(c1o1+q) - c2o27 * drho; + //feq=c2over27* (drho+three*( vx2 )+c9over2*( vx2 )*( vx2 )-cu_sq); + //(D.f[dirS])[ks]=(one-q)/(one+q)*(f_N-feq*om_turb)/(one-om_turb)+(q*(f_N+f_S)-six*c2over27*( VeloY ))/(one+q); + //(D.f[dirS])[ks]=zero; + } + + q = q_dirS[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = c0o1; + VeloZ = fac*vx3; + y = true; + feq=c2o27* (drho/*+three*( -vx2 )*/+c9o2*( -vx2 )*( -vx2 ) * (c1o1 + drho)-cu_sq); + (D.f[dirN])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY ))/(c1o1+q) - c2o27 * drho; + //feq=c2over27* (drho+three*( -vx2 )+c9over2*( -vx2 )*( -vx2 )-cu_sq); + //(D.f[dirN])[kn]=(one-q)/(one+q)*(f_S-feq*om_turb)/(one-om_turb)+(q*(f_S+f_N)-six*c2over27*(-VeloY ))/(one+q); + //(D.f[dirN])[kn]=zero; + } + + q = q_dirT[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = c0o1; + z = true; + feq=c2o27* (drho/*+three*( vx3)*/+c9o2*( vx3)*( vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirB])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ ))/(c1o1+q) - c2o27 * drho; + //feq=c2over27* (drho+three*( vx3)+c9over2*( vx3)*( vx3)-cu_sq); + //(D.f[dirB])[kb]=(one-q)/(one+q)*(f_T-feq*om_turb)/(one-om_turb)+(q*(f_T+f_B)-six*c2over27*( VeloZ ))/(one+q); + //(D.f[dirB])[kb]=one; + } + + q = q_dirB[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = c0o1; + z = true; + feq=c2o27* (drho/*+three*( -vx3)*/+c9o2*( -vx3)*( -vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirT])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ ))/(c1o1+q) - c2o27 * drho; + //feq=c2over27* (drho+three*( -vx3)+c9over2*( -vx3)*( -vx3)-cu_sq); + //(D.f[dirT])[kt]=(one-q)/(one+q)*(f_B-feq*om_turb)/(one-om_turb)+(q*(f_B+f_T)-six*c2over27*(-VeloZ ))/(one+q); + //(D.f[dirT])[kt]=zero; + } + + q = q_dirNE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + feq=c1o54* (drho/*+three*( vx1+vx2 )*/+c9o2*( vx1+vx2 )*( vx1+vx2 ) * (c1o1 + drho)-cu_sq); + (D.f[dirSW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( vx1+vx2 )+c9over2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + //(D.f[dirSW])[ksw]=(one-q)/(one+q)*(f_NE-feq*om_turb)/(one-om_turb)+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q); + //(D.f[dirSW])[ksw]=zero; + } + + q = q_dirSW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + feq=c1o54* (drho/*+three*(-vx1-vx2 )*/+c9o2*(-vx1-vx2 )*(-vx1-vx2 ) * (c1o1 + drho)-cu_sq); + (D.f[dirNE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + //(D.f[dirNE])[kne]=(one-q)/(one+q)*(f_SW-feq*om_turb)/(one-om_turb)+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q); + //(D.f[dirNE])[kne]=zero; + } + + q = q_dirSE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + feq=c1o54* (drho/*+three*( vx1-vx2 )*/+c9o2*( vx1-vx2 )*( vx1-vx2 ) * (c1o1 + drho)-cu_sq); + (D.f[dirNW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + //(D.f[dirNW])[knw]=(one-q)/(one+q)*(f_SE-feq*om_turb)/(one-om_turb)+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q); + //(D.f[dirNW])[knw]=zero; + } + + q = q_dirNW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + feq=c1o54* (drho/*+three*(-vx1+vx2 )*/+c9o2*(-vx1+vx2 )*(-vx1+vx2 ) * (c1o1 + drho)-cu_sq); + (D.f[dirSE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + //(D.f[dirSE])[kse]=(one-q)/(one+q)*(f_NW-feq*om_turb)/(one-om_turb)+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q); + //(D.f[dirSE])[kse]=zero; + } + + q = q_dirTE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (z == true) VeloZ = c0o1; + // if (k==10000) printf("AFTER x: %u \t y: %u \t z: %u \n VeloX: %f \t VeloY: %f \t VeloZ: %f \n\n", x,y,z, VeloX,VeloY,VeloZ); + feq=c1o54* (drho/*+three*( vx1 +vx3)*/+c9o2*( vx1 +vx3)*( vx1 +vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + //(D.f[dirBW])[kbw]=(one-q)/(one+q)*(f_TE-feq*om_turb)/(one-om_turb)+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q); + //(D.f[dirBW])[kbw]=zero; + } + + q = q_dirBW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*(-vx1 -vx3)*/+c9o2*(-vx1 -vx3)*(-vx1 -vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + //(D.f[dirTE])[kte]=(one-q)/(one+q)*(f_BW-feq*om_turb)/(one-om_turb)+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q); + //(D.f[dirTE])[kte]=zero; + } + + q = q_dirBE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*( vx1 -vx3)*/+c9o2*( vx1 -vx3)*( vx1 -vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + //(D.f[dirTW])[ktw]=(one-q)/(one+q)*(f_BE-feq*om_turb)/(one-om_turb)+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q); + //(D.f[dirTW])[ktw]=zero; + } + + q = q_dirTW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*(-vx1 +vx3)*/+c9o2*(-vx1 +vx3)*(-vx1 +vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + //(D.f[dirBE])[kbe]=(one-q)/(one+q)*(f_TW-feq*om_turb)/(one-om_turb)+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q); + //(D.f[dirBE])[kbe]=zero; + } + + q = q_dirTN[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*( vx2+vx3)*/+c9o2*( vx2+vx3)*( vx2+vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cu_sq); + //(D.f[dirBS])[kbs]=(one-q)/(one+q)*(f_TN-feq*om_turb)/(one-om_turb)+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q); + //(D.f[dirBS])[kbs]=zero; + } + + q = q_dirBS[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*( -vx2-vx3)*/+c9o2*( -vx2-vx3)*( -vx2-vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + //(D.f[dirTN])[ktn]=(one-q)/(one+q)*(f_BS-feq*om_turb)/(one-om_turb)+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q); + //(D.f[dirTN])[ktn]=zero; + } + + q = q_dirBN[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*( vx2-vx3)*/+c9o2*( vx2-vx3)*( vx2-vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cu_sq); + //(D.f[dirTS])[kts]=(one-q)/(one+q)*(f_BN-feq*om_turb)/(one-om_turb)+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q); + //(D.f[dirTS])[kts]=zero; + } + + q = q_dirTS[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o54* (drho/*+three*( -vx2+vx3)*/+c9o2*( -vx2+vx3)*( -vx2+vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho; + //feq=c1over54* (drho+three*( -vx2+vx3)+c9over2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + //(D.f[dirBN])[kbn]=(one-q)/(one+q)*(f_TS-feq*om_turb)/(one-om_turb)+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q); + //(D.f[dirBN])[kbn]=zero; + } + + q = q_dirTNE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + //(D.f[dirBSW])[kbsw]=(one-q)/(one+q)*(f_TNE-feq*om_turb)/(one-om_turb)+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q); + //(D.f[dirBSW])[kbsw]=zero; + } + + q = q_dirBSW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + //(D.f[dirTNE])[ktne]=(one-q)/(one+q)*(f_BSW-feq*om_turb)/(one-om_turb)+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q); + //(D.f[dirTNE])[ktne]=zero; + } + + q = q_dirBNE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + //(D.f[dirTSW])[ktsw]=(one-q)/(one+q)*(f_BNE-feq*om_turb)/(one-om_turb)+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q); + //(D.f[dirTSW])[ktsw]=zero; + } + + q = q_dirTSW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + //(D.f[dirBNE])[kbne]=(one-q)/(one+q)*(f_TSW-feq*om_turb)/(one-om_turb)+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q); + //(D.f[dirBNE])[kbne]=zero; + } + + q = q_dirTSE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + //(D.f[dirBNW])[kbnw]=(one-q)/(one+q)*(f_TSE-feq*om_turb)/(one-om_turb)+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q); + //(D.f[dirBNW])[kbnw]=zero; + } + + q = q_dirBNW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + //(D.f[dirTSE])[ktse]=(one-q)/(one+q)*(f_BNW-feq*om_turb)/(one-om_turb)+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q); + //(D.f[dirTSE])[ktse]=zero; + } + + q = q_dirBSE[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirTNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + //(D.f[dirTNW])[ktnw]=(one-q)/(one+q)*(f_BSE-feq*om_turb)/(one-om_turb)+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q); + //(D.f[dirTNW])[ktnw]=zero; + } + q = q_dirTNW[k]; + if (q>=c0o1 && q<=c1o1) + { + VeloX = fac*vx1; + VeloY = fac*vx2; + VeloZ = fac*vx3; + if (x == true) VeloX = c0o1; + if (y == true) VeloY = c0o1; + if (z == true) VeloZ = c0o1; + feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); + (D.f[dirBSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho; + //feq=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + //(D.f[dirBSE])[kbse]=(one-q)/(one+q)*(f_TNW-feq*om_turb)/(one-om_turb)+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q); + //(D.f[dirBSE])[kbse]=zero; + } + } +} @@ -3109,48 +3777,3 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD, } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu new file mode 100644 index 0000000000000000000000000000000000000000..99efb964dc1bdb3b64ce799c9c9e9f2c2abcf866 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu @@ -0,0 +1,1661 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file StressBcs27.cu +//! \author Henrik Asmuth +//! \date 16/05/2022 +//! \brief Kernels for StressBC using the iMEM approach +//! +//! Both kernels prescribe a wall shear stress using the iMEM apprach (see, Asmuth et. al (2021), https://doi.org/10.1063/5.0065701) +//! QStressDeviceComp27 couples the iMEM to the single-node interpolated bounce-back. +//! BBStressDevice27 couples the iMEM to a simple bounce-back. +//! Note, that the iMEM function is currently only implemented for straight walls with z-normal and q=0.5. +//! Other wall models could be implemented in the iMEM by replacing the formulations from Monin-Obukhov similarity theory (MOST) +//! with other formulations, e.g., for smooth walls. +//! iMEM so far most extensively tested with BBStressDevice27, but QStressDeviceComp27 also seems to be stable and working. +//======================================================================================= + +#include "LBM/LB.h" +#include "LBM/D3Q27.h" +#include <lbm/constants/NumericConstants.h> + +using namespace vf::lbm::constant; + +////////////////////////////////////////////////////////////////////////////// +extern "C" __host__ __device__ __forceinline__ void iMEM(uint k, uint kN, + real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ, + real* vx, real* vy, real* vz, + real* vx_el, real* vy_el, real* vz_el, //!>mean (temporally filtered) velocities at exchange location + real* vx_w_mean, real* vy_w_mean, real* vz_w_mean, //!>mean (temporally filtered) velocities at wall-adjactent node + real vx_w_inst, real vy_w_inst, real vz_w_inst, //!>instantaneous velocities at wall-adjactent node + real rho, + int* samplingOffset, + real q, + real forceFactor, //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015) + real eps, //!>filter constant in temporal averaging + real* z0, //!>aerodynamic roughness length + bool hasWallModelMonitor, + real* u_star_monitor, + real wallMomentumX, real wallMomentumY, real wallMomentumZ, + real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ) +{ + real wallNormalX = _wallNormalX[k]; + real wallNormalY = _wallNormalY[k]; + real wallNormalZ = _wallNormalZ[k]; + + //Sample velocity at exchange location and filter temporally + real _vx_el = eps*vx[kN]+(1.0-eps)*vx_el[k]; + real _vy_el = eps*vy[kN]+(1.0-eps)*vy_el[k]; + real _vz_el = eps*vz[kN]+(1.0-eps)*vz_el[k]; + vx_el[k] = _vx_el; + vy_el[k] = _vy_el; + vz_el[k] = _vz_el; + + //filter velocity at wall-adjacent node + real _vx_w_mean = eps*vx_w_inst+(1.0-eps)*vx_w_mean[k]; + real _vy_w_mean = eps*vy_w_inst+(1.0-eps)*vy_w_mean[k]; + real _vz_w_mean = eps*vz_w_inst+(1.0-eps)*vz_w_mean[k]; + vx_w_mean[k] = _vx_w_mean; + vy_w_mean[k] = _vy_w_mean; + vz_w_mean[k] = _vz_w_mean; + + //Subtract wall-normal velocity components + real vDotN_el = _vx_el*wallNormalX + _vy_el*wallNormalY + _vz_el*wallNormalZ; + _vx_el -= vDotN_el*wallNormalX; + _vy_el -= vDotN_el*wallNormalY; + _vz_el -= vDotN_el*wallNormalZ; + real vMag_el = sqrt( _vx_el*_vx_el + _vy_el*_vy_el + _vz_el*_vz_el ); + + real vDotN_w_mean = _vx_w_mean*wallNormalX + _vy_w_mean*wallNormalY + _vz_w_mean*wallNormalZ; + _vx_w_mean -= vDotN_w_mean*wallNormalX; + _vy_w_mean -= vDotN_w_mean*wallNormalY; + _vz_w_mean -= vDotN_w_mean*wallNormalZ; + real vMag_w_mean = sqrt( _vx_w_mean*_vx_w_mean + _vy_w_mean*_vy_w_mean + _vz_w_mean*_vz_w_mean ); + + real vDotN_w = vx_w_inst*wallNormalX + vy_w_inst*wallNormalY + vz_w_inst*wallNormalZ; + real _vx_w = vx_w_inst-vDotN_w*wallNormalX; + real _vy_w = vy_w_inst-vDotN_w*wallNormalY; + real _vz_w = vz_w_inst-vDotN_w*wallNormalZ; + + //Compute wall shear stress tau_w via MOST + real z = (real)samplingOffset[k] + 0.5; //assuming q=0.5, could be replaced by wall distance via wall normal + real kappa = 0.4; + real u_star = vMag_el*kappa/(log(z/z0[k])); + if(hasWallModelMonitor) u_star_monitor[k] = u_star; + real tau_w = u_star*u_star; //Note: this is actually tau_w/rho + real A = 1.0; //wall area (obviously 1 for grid aligned walls, can come from grid builder later for complex geometries) + + //Scale wall shear stress with near wall velocity, i.e., Schumann-Grötzbach (SG) approach + real F_w_x = (tau_w*A) * (_vx_w/vMag_w_mean);//(_vx_el/vMag_el) + real F_w_y = (tau_w*A) * (_vy_w/vMag_w_mean);//(_vy_el/vMag_el) + real F_w_z = (tau_w*A) * (_vz_w/vMag_w_mean);//(_vz_el/vMag_el) + // ^^^^^^^^^^^^--- old alternative: do not scale SG-like but only set direction via velocity at exchange location + + //Momentum to be applied via wall velocity + real wallMomDotN = wallMomentumX*wallNormalX+wallMomentumY*wallNormalY+wallMomentumZ*wallNormalZ; + real F_x = F_w_x - ( wallMomentumX - wallMomDotN*wallNormalX )/rho; + real F_y = F_w_y - ( wallMomentumY - wallMomDotN*wallNormalY )/rho; + real F_z = F_w_z - ( wallMomentumZ - wallMomDotN*wallNormalZ )/rho; + + //Compute wall velocity and clip (clipping only necessary for initial boundary layer development) + real clipWallVelo = 2.0; + real clipVx = clipWallVelo*_vx_el; + real clipVy = clipWallVelo*_vy_el; + real clipVz = clipWallVelo*_vz_el; + + wallVelocityX = clipVx > -clipVx? min(clipVx, max(-clipVx, -3.0*F_x*forceFactor)): max(clipVx, min(-clipVx, -3.0*F_x*forceFactor)); + wallVelocityY = clipVy > -clipVy? min(clipVy, max(-clipVy, -3.0*F_y*forceFactor)): max(clipVy, min(-clipVy, -3.0*F_y*forceFactor)); + wallVelocityZ = clipVz > -clipVz? min(clipVz, max(-clipVz, -3.0*F_z*forceFactor)): max(clipVz, min(-clipVz, -3.0*F_z*forceFactor)); +} + +////////////////////////////////////////////////////////////////////////////// +extern "C" __global__ void QStressDeviceComp27(real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_el, + real* vy_el, + real* vz_el, + real* vx_w_mean, + real* vy_w_mean, + real* vz_w_mean, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd) +{ + + bool printOut = false; + + Distributions27 D; + if (evenOrOdd==true)//get right array of post coll f's + { + D.f[dirE ] = &DD[dirE *size_Mat]; + D.f[dirW ] = &DD[dirW *size_Mat]; + D.f[dirN ] = &DD[dirN *size_Mat]; + D.f[dirS ] = &DD[dirS *size_Mat]; + D.f[dirT ] = &DD[dirT *size_Mat]; + D.f[dirB ] = &DD[dirB *size_Mat]; + D.f[dirNE ] = &DD[dirNE *size_Mat]; + D.f[dirSW ] = &DD[dirSW *size_Mat]; + D.f[dirSE ] = &DD[dirSE *size_Mat]; + D.f[dirNW ] = &DD[dirNW *size_Mat]; + D.f[dirTE ] = &DD[dirTE *size_Mat]; + D.f[dirBW ] = &DD[dirBW *size_Mat]; + D.f[dirBE ] = &DD[dirBE *size_Mat]; + D.f[dirTW ] = &DD[dirTW *size_Mat]; + D.f[dirTN ] = &DD[dirTN *size_Mat]; + D.f[dirBS ] = &DD[dirBS *size_Mat]; + D.f[dirBN ] = &DD[dirBN *size_Mat]; + D.f[dirTS ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirTNE *size_Mat]; + D.f[dirTSW ] = &DD[dirTSW *size_Mat]; + D.f[dirTSE ] = &DD[dirTSE *size_Mat]; + D.f[dirTNW ] = &DD[dirTNW *size_Mat]; + D.f[dirBNE ] = &DD[dirBNE *size_Mat]; + D.f[dirBSW ] = &DD[dirBSW *size_Mat]; + D.f[dirBSE ] = &DD[dirBSE *size_Mat]; + D.f[dirBNW ] = &DD[dirBNW *size_Mat]; + } + else + { + D.f[dirW ] = &DD[dirE *size_Mat]; + D.f[dirE ] = &DD[dirW *size_Mat]; + D.f[dirS ] = &DD[dirN *size_Mat]; + D.f[dirN ] = &DD[dirS *size_Mat]; + D.f[dirB ] = &DD[dirT *size_Mat]; + D.f[dirT ] = &DD[dirB *size_Mat]; + D.f[dirSW ] = &DD[dirNE *size_Mat]; + D.f[dirNE ] = &DD[dirSW *size_Mat]; + D.f[dirNW ] = &DD[dirSE *size_Mat]; + D.f[dirSE ] = &DD[dirNW *size_Mat]; + D.f[dirBW ] = &DD[dirTE *size_Mat]; + D.f[dirTE ] = &DD[dirBW *size_Mat]; + D.f[dirTW ] = &DD[dirBE *size_Mat]; + D.f[dirBE ] = &DD[dirTW *size_Mat]; + D.f[dirBS ] = &DD[dirTN *size_Mat]; + D.f[dirTN ] = &DD[dirBS *size_Mat]; + D.f[dirTS ] = &DD[dirBN *size_Mat]; + D.f[dirBN ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirBSW *size_Mat]; + D.f[dirTSW ] = &DD[dirBNE *size_Mat]; + D.f[dirTSE ] = &DD[dirBNW *size_Mat]; + D.f[dirTNW ] = &DD[dirBSE *size_Mat]; + D.f[dirBNE ] = &DD[dirTSW *size_Mat]; + D.f[dirBSW ] = &DD[dirTNE *size_Mat]; + D.f[dirBSE ] = &DD[dirTNW *size_Mat]; + D.f[dirBNW ] = &DD[dirTSE *size_Mat]; + } + //////////////////////////////////////////////////////////////////////////////// + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + const unsigned k = nx*(ny*z + y) + x; + ////////////////////////////////////////////////////////////////////////// + + if(k<sizeQ/*kQ*/) + { + //////////////////////////////////////////////////////////////////////////////// + real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, + *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, + *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, + *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, + *q_dirBSE, *q_dirBNW; + q_dirE = &QQ[dirE *sizeQ]; + q_dirW = &QQ[dirW *sizeQ]; + q_dirN = &QQ[dirN *sizeQ]; + q_dirS = &QQ[dirS *sizeQ]; + q_dirT = &QQ[dirT *sizeQ]; + q_dirB = &QQ[dirB *sizeQ]; + q_dirNE = &QQ[dirNE *sizeQ]; + q_dirSW = &QQ[dirSW *sizeQ]; + q_dirSE = &QQ[dirSE *sizeQ]; + q_dirNW = &QQ[dirNW *sizeQ]; + q_dirTE = &QQ[dirTE *sizeQ]; + q_dirBW = &QQ[dirBW *sizeQ]; + q_dirBE = &QQ[dirBE *sizeQ]; + q_dirTW = &QQ[dirTW *sizeQ]; + q_dirTN = &QQ[dirTN *sizeQ]; + q_dirBS = &QQ[dirBS *sizeQ]; + q_dirBN = &QQ[dirBN *sizeQ]; + q_dirTS = &QQ[dirTS *sizeQ]; + q_dirTNE = &QQ[dirTNE *sizeQ]; + q_dirTSW = &QQ[dirTSW *sizeQ]; + q_dirTSE = &QQ[dirTSE *sizeQ]; + q_dirTNW = &QQ[dirTNW *sizeQ]; + q_dirBNE = &QQ[dirBNE *sizeQ]; + q_dirBSW = &QQ[dirBSW *sizeQ]; + q_dirBSE = &QQ[dirBSE *sizeQ]; + q_dirBNW = &QQ[dirBNW *sizeQ]; + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + unsigned int kzero= KQK; //get right adress of post-coll f's + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, + f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; + + f_W = (D.f[dirE ])[ke ]; //post-coll f's + f_E = (D.f[dirW ])[kw ]; + f_S = (D.f[dirN ])[kn ]; + f_N = (D.f[dirS ])[ks ]; + f_B = (D.f[dirT ])[kt ]; + f_T = (D.f[dirB ])[kb ]; + f_SW = (D.f[dirNE ])[kne ]; + f_NE = (D.f[dirSW ])[ksw ]; + f_NW = (D.f[dirSE ])[kse ]; + f_SE = (D.f[dirNW ])[knw ]; + f_BW = (D.f[dirTE ])[kte ]; + f_TE = (D.f[dirBW ])[kbw ]; + f_TW = (D.f[dirBE ])[kbe ]; + f_BE = (D.f[dirTW ])[ktw ]; + f_BS = (D.f[dirTN ])[ktn ]; + f_TN = (D.f[dirBS ])[kbs ]; + f_TS = (D.f[dirBN ])[kbn ]; + f_BN = (D.f[dirTS ])[kts ]; + f_BSW = (D.f[dirTNE ])[ktne ]; + f_BNE = (D.f[dirTSW ])[ktsw ]; + f_BNW = (D.f[dirTSE ])[ktse ]; + f_BSE = (D.f[dirTNW ])[ktnw ]; + f_TSW = (D.f[dirBNE ])[kbne ]; + f_TNE = (D.f[dirBSW ])[kbsw ]; + f_TNW = (D.f[dirBSE ])[kbse ]; + f_TSE = (D.f[dirBNW ])[kbnw ]; + + //////////////////////////////////////////////////////////////////////////////// + real vx1, vx2, vx3, drho, feq, q; + drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[dirZERO])[kzero]); + + vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + + (f_E - f_W)) / (c1o1 + drho); + + + vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + + (f_N - f_S)) / (c1o1 + drho); + + vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + + (f_T - f_B)) / (c1o1 + drho); + + real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho); + + real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]])); + ////////////////////////////////////////////////////////////////////////// + if (evenOrOdd==false) //get adress where incoming f's should be written to + { + D.f[dirE ] = &DD[dirE *size_Mat]; + D.f[dirW ] = &DD[dirW *size_Mat]; + D.f[dirN ] = &DD[dirN *size_Mat]; + D.f[dirS ] = &DD[dirS *size_Mat]; + D.f[dirT ] = &DD[dirT *size_Mat]; + D.f[dirB ] = &DD[dirB *size_Mat]; + D.f[dirNE ] = &DD[dirNE *size_Mat]; + D.f[dirSW ] = &DD[dirSW *size_Mat]; + D.f[dirSE ] = &DD[dirSE *size_Mat]; + D.f[dirNW ] = &DD[dirNW *size_Mat]; + D.f[dirTE ] = &DD[dirTE *size_Mat]; + D.f[dirBW ] = &DD[dirBW *size_Mat]; + D.f[dirBE ] = &DD[dirBE *size_Mat]; + D.f[dirTW ] = &DD[dirTW *size_Mat]; + D.f[dirTN ] = &DD[dirTN *size_Mat]; + D.f[dirBS ] = &DD[dirBS *size_Mat]; + D.f[dirBN ] = &DD[dirBN *size_Mat]; + D.f[dirTS ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirTNE *size_Mat]; + D.f[dirTSW ] = &DD[dirTSW *size_Mat]; + D.f[dirTSE ] = &DD[dirTSE *size_Mat]; + D.f[dirTNW ] = &DD[dirTNW *size_Mat]; + D.f[dirBNE ] = &DD[dirBNE *size_Mat]; + D.f[dirBSW ] = &DD[dirBSW *size_Mat]; + D.f[dirBSE ] = &DD[dirBSE *size_Mat]; + D.f[dirBNW ] = &DD[dirBNW *size_Mat]; + } + else + { + D.f[dirW ] = &DD[dirE *size_Mat]; + D.f[dirE ] = &DD[dirW *size_Mat]; + D.f[dirS ] = &DD[dirN *size_Mat]; + D.f[dirN ] = &DD[dirS *size_Mat]; + D.f[dirB ] = &DD[dirT *size_Mat]; + D.f[dirT ] = &DD[dirB *size_Mat]; + D.f[dirSW ] = &DD[dirNE *size_Mat]; + D.f[dirNE ] = &DD[dirSW *size_Mat]; + D.f[dirNW ] = &DD[dirSE *size_Mat]; + D.f[dirSE ] = &DD[dirNW *size_Mat]; + D.f[dirBW ] = &DD[dirTE *size_Mat]; + D.f[dirTE ] = &DD[dirBW *size_Mat]; + D.f[dirTW ] = &DD[dirBE *size_Mat]; + D.f[dirBE ] = &DD[dirTW *size_Mat]; + D.f[dirBS ] = &DD[dirTN *size_Mat]; + D.f[dirTN ] = &DD[dirBS *size_Mat]; + D.f[dirTS ] = &DD[dirBN *size_Mat]; + D.f[dirBN ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirBSW *size_Mat]; + D.f[dirTSW ] = &DD[dirBNE *size_Mat]; + D.f[dirTSE ] = &DD[dirBNW *size_Mat]; + D.f[dirTNW ] = &DD[dirBSE *size_Mat]; + D.f[dirBNE ] = &DD[dirTSW *size_Mat]; + D.f[dirBSW ] = &DD[dirTNE *size_Mat]; + D.f[dirBSE ] = &DD[dirTNW *size_Mat]; + D.f[dirBNW ] = &DD[dirTSE *size_Mat]; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //Compute incoming f's with zero wall velocity + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // incoming f's from bounce back + real f_E_in = 0.0, f_W_in = 0.0, f_N_in = 0.0, f_S_in = 0.0, f_T_in = 0.0, f_B_in = 0.0, f_NE_in = 0.0, f_SW_in = 0.0, f_SE_in = 0.0, f_NW_in = 0.0, f_TE_in = 0.0, f_BW_in = 0.0, f_BE_in = 0.0, f_TW_in = 0.0, f_TN_in = 0.0, f_BS_in = 0.0, f_BN_in = 0.0, f_TS_in = 0.0, f_TNE_in = 0.0, f_TSW_in = 0.0, f_TSE_in = 0.0, f_TNW_in = 0.0, f_BNE_in = 0.0, f_BSW_in = 0.0, f_BSE_in = 0.0, f_BNW_in = 0.0; + // momentum exchanged with wall at rest + real wallMomentumX = 0.0, wallMomentumY = 0.0, wallMomentumZ = 0.0; + + q = q_dirE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho/*+three*( vx1 )*/+c9o2*( vx1 )*( vx1 ) * (c1o1 + drho)-cu_sq); + f_W_in=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_E+f_W))/(c1o1+q) - c2o27 * drho; + wallMomentumX += f_E+f_W_in; + } + + q = q_dirW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho/*+three*(-vx1 )*/+c9o2*(-vx1 )*(-vx1 ) * (c1o1 + drho)-cu_sq); + f_E_in=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_W+f_E))/(c1o1+q) - c2o27 * drho; + wallMomentumX -= f_W+f_E_in; + } + + q = q_dirN[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho/*+three*( vx2 )*/+c9o2*( vx2 )*( vx2 ) * (c1o1 + drho)-cu_sq); + f_S_in=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_N+f_S))/(c1o1+q) - c2o27 * drho; + wallMomentumY += f_N+f_S_in; + } + + q = q_dirS[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho/*+three*( -vx2 )*/+c9o2*( -vx2 )*( -vx2 ) * (c1o1 + drho)-cu_sq); + f_N_in=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_S+f_N))/(c1o1+q) - c2o27 * drho; + wallMomentumY -= f_S+f_N_in; + } + + q = q_dirT[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho/*+three*( vx3)*/+c9o2*( vx3)*( vx3) * (c1o1 + drho)-cu_sq); + f_B_in=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_T+f_B))/(c1o1+q) - c2o27 * drho; + wallMomentumZ += f_T+f_B_in; + } + + q = q_dirB[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho/*+three*( -vx3)*/+c9o2*( -vx3)*( -vx3) * (c1o1 + drho)-cu_sq); + f_T_in=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_B+f_T))/(c1o1+q) - c2o27 * drho; + wallMomentumZ -= f_B+f_T_in; + } + + q = q_dirNE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( vx1+vx2 )*/+c9o2*( vx1+vx2 )*( vx1+vx2 ) * (c1o1 + drho)-cu_sq); + f_SW_in=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NE+f_SW))/(c1o1+q) - c1o54 * drho; + wallMomentumX += f_NE+f_SW_in; + wallMomentumY += f_NE+f_SW_in; + } + + q = q_dirSW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*(-vx1-vx2 )*/+c9o2*(-vx1-vx2 )*(-vx1-vx2 ) * (c1o1 + drho)-cu_sq); + f_NE_in=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SW+f_NE))/(c1o1+q) - c1o54 * drho; + wallMomentumX -= f_SW+f_NE_in; + wallMomentumY -= f_SW+f_NE_in; + } + + q = q_dirSE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( vx1-vx2 )*/+c9o2*( vx1-vx2 )*( vx1-vx2 ) * (c1o1 + drho)-cu_sq); + f_NW_in=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SE+f_NW))/(c1o1+q) - c1o54 * drho; + wallMomentumX += f_SE+f_NW_in; + wallMomentumY -= f_SE+f_NW_in; + } + + q = q_dirNW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*(-vx1+vx2 )*/+c9o2*(-vx1+vx2 )*(-vx1+vx2 ) * (c1o1 + drho)-cu_sq); + f_SE_in=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NW+f_SE))/(c1o1+q) - c1o54 * drho; + wallMomentumX -= f_NW+f_SE_in; + wallMomentumY += f_NW+f_SE_in; + } + + q = q_dirTE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( vx1 +vx3)*/+c9o2*( vx1 +vx3)*( vx1 +vx3) * (c1o1 + drho)-cu_sq); + f_BW_in=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TE+f_BW))/(c1o1+q) - c1o54 * drho; + wallMomentumX += f_TE+f_BW_in; + wallMomentumZ += f_TE+f_BW_in; + } + + q = q_dirBW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*(-vx1 -vx3)*/+c9o2*(-vx1 -vx3)*(-vx1 -vx3) * (c1o1 + drho)-cu_sq); + f_TE_in=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BW+f_TE))/(c1o1+q) - c1o54 * drho; + wallMomentumX -= f_BW+f_TE_in; + wallMomentumZ -= f_BW+f_TE_in; + } + + q = q_dirBE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( vx1 -vx3)*/+c9o2*( vx1 -vx3)*( vx1 -vx3) * (c1o1 + drho)-cu_sq); + f_TW_in=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BE+f_TW))/(c1o1+q) - c1o54 * drho; + wallMomentumX += f_BE+f_TW_in; + wallMomentumZ -= f_BE+f_TW_in; + } + + q = q_dirTW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*(-vx1 +vx3)*/+c9o2*(-vx1 +vx3)*(-vx1 +vx3) * (c1o1 + drho)-cu_sq); + f_BE_in=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TW+f_BE))/(c1o1+q) - c1o54 * drho; + wallMomentumX -= f_TW+f_BE_in; + wallMomentumZ += f_TW+f_BE_in; + } + + q = q_dirTN[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( vx2+vx3)*/+c9o2*( vx2+vx3)*( vx2+vx3) * (c1o1 + drho)-cu_sq); + f_BS_in=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TN+f_BS))/(c1o1+q) - c1o54 * drho; + wallMomentumY += f_TN+f_BS_in; + wallMomentumZ += f_TN+f_BS_in; + } + + q = q_dirBS[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( -vx2-vx3)*/+c9o2*( -vx2-vx3)*( -vx2-vx3) * (c1o1 + drho)-cu_sq); + f_TN_in=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BS+f_TN))/(c1o1+q) - c1o54 * drho; + wallMomentumY -= f_BS+f_TN_in; + wallMomentumZ -= f_BS+f_TN_in; + } + + q = q_dirBN[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( vx2-vx3)*/+c9o2*( vx2-vx3)*( vx2-vx3) * (c1o1 + drho)-cu_sq); + f_TS_in=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BN+f_TS))/(c1o1+q) - c1o54 * drho; + wallMomentumY += f_BN+f_TS_in; + wallMomentumZ -= f_BN+f_TS_in; + } + + q = q_dirTS[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho/*+three*( -vx2+vx3)*/+c9o2*( -vx2+vx3)*( -vx2+vx3) * (c1o1 + drho)-cu_sq); + f_BN_in=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TS+f_BN))/(c1o1+q) - c1o54 * drho; + wallMomentumY -= f_TS+f_BN_in; + wallMomentumZ += f_TS+f_BN_in; + } + + q = q_dirTNE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); + f_BSW_in=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q) - c1o216 * drho; + wallMomentumX += f_TNE+f_BSW_in; + wallMomentumY += f_TNE+f_BSW_in; + wallMomentumZ += f_TNE+f_BSW_in; + } + + q = q_dirBSW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); + f_TNE_in=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q) - c1o216 * drho; + wallMomentumX -= f_BSW+f_TNE_in; + wallMomentumY -= f_BSW+f_TNE_in; + wallMomentumZ -= f_BSW+f_TNE_in; + } + + q = q_dirBNE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); + f_TSW_in=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q) - c1o216 * drho; + wallMomentumX += f_BNE+f_TSW_in; + wallMomentumY += f_BNE+f_TSW_in; + wallMomentumZ -= f_BNE+f_TSW_in; + } + + q = q_dirTSW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); + f_BNE_in=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q) - c1o216 * drho; + wallMomentumX -= f_TSW+f_BNE_in; + wallMomentumY -= f_TSW+f_BNE_in; + wallMomentumZ += f_TSW+f_BNE_in; + } + + q = q_dirTSE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); + f_BNW_in=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q) - c1o216 * drho; + wallMomentumX += f_TSE+f_BNW_in; + wallMomentumY -= f_TSE+f_BNW_in; + wallMomentumZ += f_TSE+f_BNW_in; + } + + q = q_dirBNW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); + f_TSE_in=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q) - c1o216 * drho; + wallMomentumX -= f_BNW+f_TSE_in; + wallMomentumY += f_BNW+f_TSE_in; + wallMomentumZ -= f_BNW+f_TSE_in; + } + + q = q_dirBSE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); + f_TNW_in=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q) - c1o216 * drho; + wallMomentumX += f_BSE+f_TNW_in; + wallMomentumY -= f_BSE+f_TNW_in; + wallMomentumZ -= f_BSE+f_TNW_in; + } + + q = q_dirTNW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); + f_BSE_in=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q) - c1o216 * drho; + wallMomentumX -= f_TNW+f_BSE_in; + wallMomentumY += f_TNW+f_BSE_in; + wallMomentumZ += f_TNW+f_BSE_in; + } + + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // //Compute wall velocity + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real VeloX=0.0, VeloY=0.0, VeloZ=0.0; + + q = 0.5f; + real eps = 0.001f; + + iMEM( k, k_N[k], + normalX, normalY, normalZ, + vx, vy, vz, + vx_el, vy_el, vz_el, + vx_w_mean, vy_w_mean, vz_w_mean, + vx1, vx2, vx3, + c1o1+drho, + samplingOffset, + q, + 1.0+q, + eps, + z0, + hasWallModelMonitor, + u_star_monitor, + wallMomentumX, wallMomentumY, wallMomentumZ, + VeloX, VeloY, VeloZ); + + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // //Add wall velocity and write f's + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + q = q_dirE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirW])[kw] = f_W_in - (c6o1*c2o27*( VeloX ))/(c1o1+q); + wallMomentumX += -(c6o1*c2o27*( VeloX ))/(c1o1+q); + } + + q = q_dirW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirE])[ke] = f_E_in - (c6o1*c2o27*(-VeloX ))/(c1o1+q); + wallMomentumX -= - (c6o1*c2o27*(-VeloX ))/(c1o1+q); + } + + q = q_dirN[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirS])[ks] = f_S_in - (c6o1*c2o27*( VeloY ))/(c1o1+q); + wallMomentumY += - (c6o1*c2o27*( VeloY ))/(c1o1+q); + } + + q = q_dirS[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirN])[kn] = f_N_in - (c6o1*c2o27*(-VeloY ))/(c1o1+q); + wallMomentumY -= -(c6o1*c2o27*(-VeloY ))/(c1o1+q); + } + + q = q_dirT[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirB])[kb] = f_B_in - (c6o1*c2o27*( VeloZ ))/(c1o1+q); + wallMomentumZ += - (c6o1*c2o27*( VeloZ ))/(c1o1+q); + } + + q = q_dirB[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirT])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ ))/(c1o1+q); + wallMomentumZ -= -(c6o1*c2o27*(-VeloZ ))/(c1o1+q); + } + + q = q_dirNE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirSW])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY))/(c1o1+q); + wallMomentumX += -(c6o1*c1o54*(VeloX+VeloY))/(c1o1+q); + wallMomentumY += -(c6o1*c1o54*(VeloX+VeloY))/(c1o1+q); + } + + q = q_dirSW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirNE])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q); + } + + q = q_dirSE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirNW])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY))/(c1o1+q); + wallMomentumX += -(c6o1*c1o54*( VeloX-VeloY))/(c1o1+q); + wallMomentumY -= -(c6o1*c1o54*( VeloX-VeloY))/(c1o1+q); + } + + q = q_dirNW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirSE])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q); + wallMomentumY += - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q); + } + + q = q_dirTE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBW])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q); + wallMomentumX += - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q); + } + + q = q_dirBW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTE])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q); + } + + q = q_dirBE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTW])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q); + wallMomentumX += - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q); + } + + q = q_dirTW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBE])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q); + } + + q = q_dirTN[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBS])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q); + wallMomentumY += - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q); + } + + q = q_dirBS[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTN])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q); + } + + q = q_dirBN[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTS])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q); + wallMomentumY += - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q); + } + + q = q_dirTS[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBN])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q); + } + + q = q_dirTNE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBSW])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q); + wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q); + wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q); + } + + q = q_dirBSW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTNE])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q); + } + + q = q_dirBNE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTSW])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q); + wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q); + wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q); + } + + q = q_dirTSW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBNE])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q); + } + + q = q_dirTSE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBNW])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q); + wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q); + } + + q = q_dirBNW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTSE])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q); + wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q); + } + + q = q_dirBSE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTNW])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q); + wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q); + wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q); + wallMomentumZ -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q); + } + + q = q_dirTNW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBSE])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q); + wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q); + wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q); + wallMomentumZ += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q); + } + + if(hasWallModelMonitor) + { + Fx_monitor[k] = wallMomentumX; + Fy_monitor[k] = wallMomentumY; + Fz_monitor[k] = wallMomentumZ; + } + + } +} + +////////////////////////////////////////////////////////////////////////////// +extern "C" __global__ void BBStressDevice27( real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int sizeQ, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_el, + real* vy_el, + real* vz_el, + real* vx_w_mean, + real* vy_w_mean, + real* vz_w_mean, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int size_Mat, + bool evenOrOdd) +{ + Distributions27 D; + if (evenOrOdd==true) + { + D.f[dirE ] = &DD[dirE *size_Mat]; + D.f[dirW ] = &DD[dirW *size_Mat]; + D.f[dirN ] = &DD[dirN *size_Mat]; + D.f[dirS ] = &DD[dirS *size_Mat]; + D.f[dirT ] = &DD[dirT *size_Mat]; + D.f[dirB ] = &DD[dirB *size_Mat]; + D.f[dirNE ] = &DD[dirNE *size_Mat]; + D.f[dirSW ] = &DD[dirSW *size_Mat]; + D.f[dirSE ] = &DD[dirSE *size_Mat]; + D.f[dirNW ] = &DD[dirNW *size_Mat]; + D.f[dirTE ] = &DD[dirTE *size_Mat]; + D.f[dirBW ] = &DD[dirBW *size_Mat]; + D.f[dirBE ] = &DD[dirBE *size_Mat]; + D.f[dirTW ] = &DD[dirTW *size_Mat]; + D.f[dirTN ] = &DD[dirTN *size_Mat]; + D.f[dirBS ] = &DD[dirBS *size_Mat]; + D.f[dirBN ] = &DD[dirBN *size_Mat]; + D.f[dirTS ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirTNE *size_Mat]; + D.f[dirTSW ] = &DD[dirTSW *size_Mat]; + D.f[dirTSE ] = &DD[dirTSE *size_Mat]; + D.f[dirTNW ] = &DD[dirTNW *size_Mat]; + D.f[dirBNE ] = &DD[dirBNE *size_Mat]; + D.f[dirBSW ] = &DD[dirBSW *size_Mat]; + D.f[dirBSE ] = &DD[dirBSE *size_Mat]; + D.f[dirBNW ] = &DD[dirBNW *size_Mat]; + } + else + { + D.f[dirW ] = &DD[dirE *size_Mat]; + D.f[dirE ] = &DD[dirW *size_Mat]; + D.f[dirS ] = &DD[dirN *size_Mat]; + D.f[dirN ] = &DD[dirS *size_Mat]; + D.f[dirB ] = &DD[dirT *size_Mat]; + D.f[dirT ] = &DD[dirB *size_Mat]; + D.f[dirSW ] = &DD[dirNE *size_Mat]; + D.f[dirNE ] = &DD[dirSW *size_Mat]; + D.f[dirNW ] = &DD[dirSE *size_Mat]; + D.f[dirSE ] = &DD[dirNW *size_Mat]; + D.f[dirBW ] = &DD[dirTE *size_Mat]; + D.f[dirTE ] = &DD[dirBW *size_Mat]; + D.f[dirTW ] = &DD[dirBE *size_Mat]; + D.f[dirBE ] = &DD[dirTW *size_Mat]; + D.f[dirBS ] = &DD[dirTN *size_Mat]; + D.f[dirTN ] = &DD[dirBS *size_Mat]; + D.f[dirTS ] = &DD[dirBN *size_Mat]; + D.f[dirBN ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirBSW *size_Mat]; + D.f[dirTSW ] = &DD[dirBNE *size_Mat]; + D.f[dirTSE ] = &DD[dirBNW *size_Mat]; + D.f[dirTNW ] = &DD[dirBSE *size_Mat]; + D.f[dirBNE ] = &DD[dirTSW *size_Mat]; + D.f[dirBSW ] = &DD[dirTNE *size_Mat]; + D.f[dirBSE ] = &DD[dirTNW *size_Mat]; + D.f[dirBNW ] = &DD[dirTSE *size_Mat]; + } + //////////////////////////////////////////////////////////////////////////////// + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + const unsigned k = nx*(ny*z + y) + x; + ////////////////////////////////////////////////////////////////////////// + + if(k<sizeQ) + { + //////////////////////////////////////////////////////////////////////////////// + real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, + *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, + *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, + *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, + *q_dirBSE, *q_dirBNW; + q_dirE = &QQ[dirE *sizeQ]; + q_dirW = &QQ[dirW *sizeQ]; + q_dirN = &QQ[dirN *sizeQ]; + q_dirS = &QQ[dirS *sizeQ]; + q_dirT = &QQ[dirT *sizeQ]; + q_dirB = &QQ[dirB *sizeQ]; + q_dirNE = &QQ[dirNE *sizeQ]; + q_dirSW = &QQ[dirSW *sizeQ]; + q_dirSE = &QQ[dirSE *sizeQ]; + q_dirNW = &QQ[dirNW *sizeQ]; + q_dirTE = &QQ[dirTE *sizeQ]; + q_dirBW = &QQ[dirBW *sizeQ]; + q_dirBE = &QQ[dirBE *sizeQ]; + q_dirTW = &QQ[dirTW *sizeQ]; + q_dirTN = &QQ[dirTN *sizeQ]; + q_dirBS = &QQ[dirBS *sizeQ]; + q_dirBN = &QQ[dirBN *sizeQ]; + q_dirTS = &QQ[dirTS *sizeQ]; + q_dirTNE = &QQ[dirTNE *sizeQ]; + q_dirTSW = &QQ[dirTSW *sizeQ]; + q_dirTSE = &QQ[dirTSE *sizeQ]; + q_dirTNW = &QQ[dirTNW *sizeQ]; + q_dirBNE = &QQ[dirBNE *sizeQ]; + q_dirBSW = &QQ[dirBSW *sizeQ]; + q_dirBSE = &QQ[dirBSE *sizeQ]; + q_dirBNW = &QQ[dirBNW *sizeQ]; + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + + //////////////////////////////////////////////////////////////////////////////// + real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, + f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; + + f_W = (D.f[dirE ])[ke ]; + f_E = (D.f[dirW ])[kw ]; + f_S = (D.f[dirN ])[kn ]; + f_N = (D.f[dirS ])[ks ]; + f_B = (D.f[dirT ])[kt ]; + f_T = (D.f[dirB ])[kb ]; + f_SW = (D.f[dirNE ])[kne ]; + f_NE = (D.f[dirSW ])[ksw ]; + f_NW = (D.f[dirSE ])[kse ]; + f_SE = (D.f[dirNW ])[knw ]; + f_BW = (D.f[dirTE ])[kte ]; + f_TE = (D.f[dirBW ])[kbw ]; + f_TW = (D.f[dirBE ])[kbe ]; + f_BE = (D.f[dirTW ])[ktw ]; + f_BS = (D.f[dirTN ])[ktn ]; + f_TN = (D.f[dirBS ])[kbs ]; + f_TS = (D.f[dirBN ])[kbn ]; + f_BN = (D.f[dirTS ])[kts ]; + f_BSW = (D.f[dirTNE ])[ktne ]; + f_BNE = (D.f[dirTSW ])[ktsw ]; + f_BNW = (D.f[dirTSE ])[ktse ]; + f_BSE = (D.f[dirTNW ])[ktnw ]; + f_TSW = (D.f[dirBNE ])[kbne ]; + f_TNE = (D.f[dirBSW ])[kbsw ]; + f_TNW = (D.f[dirBSE ])[kbse ]; + f_TSE = (D.f[dirBNW ])[kbnw ]; + + //////////////////////////////////////////////////////////////////////////////// + real vx1, vx2, vx3, drho; + drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[dirZERO])[kzero]); + + vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + + (f_E - f_W)) / (c1o1 + drho); + + + vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + + (f_N - f_S)) / (c1o1 + drho); + + vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + + (f_T - f_B)) / (c1o1 + drho); + + ////////////////////////////////////////////////////////////////////////// + if (evenOrOdd==false) + { + D.f[dirE ] = &DD[dirE *size_Mat]; + D.f[dirW ] = &DD[dirW *size_Mat]; + D.f[dirN ] = &DD[dirN *size_Mat]; + D.f[dirS ] = &DD[dirS *size_Mat]; + D.f[dirT ] = &DD[dirT *size_Mat]; + D.f[dirB ] = &DD[dirB *size_Mat]; + D.f[dirNE ] = &DD[dirNE *size_Mat]; + D.f[dirSW ] = &DD[dirSW *size_Mat]; + D.f[dirSE ] = &DD[dirSE *size_Mat]; + D.f[dirNW ] = &DD[dirNW *size_Mat]; + D.f[dirTE ] = &DD[dirTE *size_Mat]; + D.f[dirBW ] = &DD[dirBW *size_Mat]; + D.f[dirBE ] = &DD[dirBE *size_Mat]; + D.f[dirTW ] = &DD[dirTW *size_Mat]; + D.f[dirTN ] = &DD[dirTN *size_Mat]; + D.f[dirBS ] = &DD[dirBS *size_Mat]; + D.f[dirBN ] = &DD[dirBN *size_Mat]; + D.f[dirTS ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirTNE *size_Mat]; + D.f[dirTSW ] = &DD[dirTSW *size_Mat]; + D.f[dirTSE ] = &DD[dirTSE *size_Mat]; + D.f[dirTNW ] = &DD[dirTNW *size_Mat]; + D.f[dirBNE ] = &DD[dirBNE *size_Mat]; + D.f[dirBSW ] = &DD[dirBSW *size_Mat]; + D.f[dirBSE ] = &DD[dirBSE *size_Mat]; + D.f[dirBNW ] = &DD[dirBNW *size_Mat]; + } + else + { + D.f[dirW ] = &DD[dirE *size_Mat]; + D.f[dirE ] = &DD[dirW *size_Mat]; + D.f[dirS ] = &DD[dirN *size_Mat]; + D.f[dirN ] = &DD[dirS *size_Mat]; + D.f[dirB ] = &DD[dirT *size_Mat]; + D.f[dirT ] = &DD[dirB *size_Mat]; + D.f[dirSW ] = &DD[dirNE *size_Mat]; + D.f[dirNE ] = &DD[dirSW *size_Mat]; + D.f[dirNW ] = &DD[dirSE *size_Mat]; + D.f[dirSE ] = &DD[dirNW *size_Mat]; + D.f[dirBW ] = &DD[dirTE *size_Mat]; + D.f[dirTE ] = &DD[dirBW *size_Mat]; + D.f[dirTW ] = &DD[dirBE *size_Mat]; + D.f[dirBE ] = &DD[dirTW *size_Mat]; + D.f[dirBS ] = &DD[dirTN *size_Mat]; + D.f[dirTN ] = &DD[dirBS *size_Mat]; + D.f[dirTS ] = &DD[dirBN *size_Mat]; + D.f[dirBN ] = &DD[dirTS *size_Mat]; + D.f[dirZERO] = &DD[dirZERO*size_Mat]; + D.f[dirTNE ] = &DD[dirBSW *size_Mat]; + D.f[dirTSW ] = &DD[dirBNE *size_Mat]; + D.f[dirTSE ] = &DD[dirBNW *size_Mat]; + D.f[dirTNW ] = &DD[dirBSE *size_Mat]; + D.f[dirBNE ] = &DD[dirTSW *size_Mat]; + D.f[dirBSW ] = &DD[dirTNE *size_Mat]; + D.f[dirBSE ] = &DD[dirTNW *size_Mat]; + D.f[dirBNW ] = &DD[dirTSE *size_Mat]; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real f_E_in, f_W_in, f_N_in, f_S_in, f_T_in, f_B_in, f_NE_in, f_SW_in, f_SE_in, f_NW_in, f_TE_in, f_BW_in, f_BE_in, + f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in; + + // momentum exchanged with wall at rest + real wallMomentumX = 0.0, wallMomentumY = 0.0, wallMomentumZ = 0.0; + + real q; + q = q_dirE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_W_in=f_E; + wallMomentumX += f_E+f_W_in; + } + + q = q_dirW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_E_in=f_W; + wallMomentumX -= f_W+f_E_in; + } + + q = q_dirN[k]; + if (q>=c0o1 && q<=c1o1) + { + f_S_in=f_N; + wallMomentumY += f_N+f_S_in; + } + + q = q_dirS[k]; + if (q>=c0o1 && q<=c1o1) + { + f_N_in=f_S; + wallMomentumY -= f_S+f_N_in; + } + + q = q_dirT[k]; + if (q>=c0o1 && q<=c1o1) + { + f_B_in=f_T; + wallMomentumZ += f_T+f_B_in; + } + + q = q_dirB[k]; + if (q>=c0o1 && q<=c1o1) + { + f_T_in=f_B; + wallMomentumZ -= f_B+f_T_in; + } + + q = q_dirNE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_SW_in=f_NE; + wallMomentumX += f_NE+f_SW_in; + wallMomentumY += f_NE+f_SW_in; + } + + q = q_dirSW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_NE_in=f_SW; + wallMomentumX -= f_SW+f_NE_in; + wallMomentumY -= f_SW+f_NE_in; + } + + q = q_dirSE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_NW_in=f_SE; + wallMomentumX += f_SE+f_NW_in; + wallMomentumY -= f_SE+f_NW_in; + } + + q = q_dirNW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_SE_in=f_NW; + wallMomentumX -= f_NW+f_SE_in; + wallMomentumY += f_NW+f_SE_in; + } + + q = q_dirTE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BW_in=f_TE; + wallMomentumX += f_TE+f_BW_in; + wallMomentumZ += f_TE+f_BW_in; + } + + q = q_dirBW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TE_in=f_BW; + wallMomentumX -= f_BW+f_TE_in; + wallMomentumZ -= f_BW+f_TE_in; + } + + q = q_dirBE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TW_in=f_BE; + wallMomentumX += f_BE+f_TW_in; + wallMomentumZ -= f_BE+f_TW_in; + } + + q = q_dirTW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BE_in=f_TW; + wallMomentumX -= f_TW+f_BE_in; + wallMomentumZ += f_TW+f_BE_in; + } + + q = q_dirTN[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BS_in=f_TN; + wallMomentumY += f_TN+f_BS_in; + wallMomentumZ += f_TN+f_BS_in; + } + + q = q_dirBS[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TN_in=f_BS; + wallMomentumY -= f_BS+f_TN_in; + wallMomentumZ -= f_BS+f_TN_in; + } + + q = q_dirBN[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TS_in=f_BN; + wallMomentumY += f_BN+f_TS_in; + wallMomentumZ -= f_BN+f_TS_in; + } + + q = q_dirTS[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BN_in=f_TS; + wallMomentumY -= f_TS+f_BN_in; + wallMomentumZ += f_TS+f_BN_in; + } + + q = q_dirTNE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BSW_in=f_TNE; + wallMomentumX += f_TNE+f_BSW_in; + wallMomentumY += f_TNE+f_BSW_in; + wallMomentumZ += f_TNE+f_BSW_in; + } + + q = q_dirBSW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TNE_in=f_BSW; + wallMomentumX -= f_BSW+f_TNE_in; + wallMomentumY -= f_BSW+f_TNE_in; + wallMomentumZ -= f_BSW+f_TNE_in; + } + + q = q_dirBNE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TSW_in=f_BNE; + wallMomentumX += f_BNE+f_TSW_in; + wallMomentumY += f_BNE+f_TSW_in; + wallMomentumZ -= f_BNE+f_TSW_in; + } + + q = q_dirTSW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BNE_in=f_TSW; + wallMomentumX -= f_TSW+f_BNE_in; + wallMomentumY -= f_TSW+f_BNE_in; + wallMomentumZ += f_TSW+f_BNE_in; + } + + q = q_dirTSE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BNW_in=f_TSE; + wallMomentumX += f_TSE+f_BNW_in; + wallMomentumY -= f_TSE+f_BNW_in; + wallMomentumZ += f_TSE+f_BNW_in; + } + + q = q_dirBNW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TSE_in=f_BNW; + wallMomentumX -= f_BNW+f_TSE_in; + wallMomentumY += f_BNW+f_TSE_in; + wallMomentumZ -= f_BNW+f_TSE_in; + } + + q = q_dirBSE[k]; + if (q>=c0o1 && q<=c1o1) + { + f_TNW_in=f_BSE; + wallMomentumX += f_BSE+f_TNW_in; + wallMomentumY -= f_BSE+f_TNW_in; + wallMomentumZ -= f_BSE+f_TNW_in; + } + + q = q_dirTNW[k]; + if (q>=c0o1 && q<=c1o1) + { + f_BSE_in=f_TNW; + wallMomentumX -= f_TNW+f_BSE_in; + wallMomentumY += f_TNW+f_BSE_in; + wallMomentumZ += f_TNW+f_BSE_in; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // //Compute wall velocity + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real VeloX=0.0, VeloY=0.0, VeloZ=0.0; + + q = 0.5f; + real eps = 0.001f; + + iMEM( k, k_N[k], + normalX, normalY, normalZ, + vx, vy, vz, + vx_el, vy_el, vz_el, + vx_w_mean, vy_w_mean, vz_w_mean, + vx1, vx2, vx3, + c1o1+drho, + samplingOffset, + q, + 1.0, + eps, + z0, + hasWallModelMonitor, + u_star_monitor, + wallMomentumX, wallMomentumY, wallMomentumZ, + VeloX, VeloY, VeloZ); + + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // //Add wall velocity and write f's + // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + q = q_dirE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirW])[kw] = f_W_in - (c6o1*c2o27*( VeloX )); + wallMomentumX += -(c6o1*c2o27*( VeloX )); + } + + q = q_dirW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirE])[ke] = f_E_in - (c6o1*c2o27*(-VeloX )); + wallMomentumX -= - (c6o1*c2o27*(-VeloX )); + } + + q = q_dirN[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirS])[ks] = f_S_in - (c6o1*c2o27*( VeloY )); + wallMomentumY += - (c6o1*c2o27*( VeloY )); + } + + q = q_dirS[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirN])[kn] = f_N_in - (c6o1*c2o27*(-VeloY )); + wallMomentumY -= -(c6o1*c2o27*(-VeloY )); + } + + q = q_dirT[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirB])[kb] = f_B_in - (c6o1*c2o27*( VeloZ )); + wallMomentumZ += - (c6o1*c2o27*( VeloZ )); + } + + q = q_dirB[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirT])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ )); + wallMomentumZ -= -(c6o1*c2o27*(-VeloZ )); + } + + q = q_dirNE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirSW])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY)); + wallMomentumX += -(c6o1*c1o54*(VeloX+VeloY)); + wallMomentumY += -(c6o1*c1o54*(VeloX+VeloY)); + } + + q = q_dirSW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirNE])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY)); + wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloY)); + wallMomentumY -= - (c6o1*c1o54*(-VeloX-VeloY)); + } + + q = q_dirSE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirNW])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY)); + wallMomentumX += -(c6o1*c1o54*( VeloX-VeloY)); + wallMomentumY -= -(c6o1*c1o54*( VeloX-VeloY)); + } + + q = q_dirNW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirSE])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY)); + wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloY)); + wallMomentumY += - (c6o1*c1o54*(-VeloX+VeloY)); + } + + q = q_dirTE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBW])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ)); + wallMomentumX += - (c6o1*c1o54*( VeloX+VeloZ)); + wallMomentumZ += - (c6o1*c1o54*( VeloX+VeloZ)); + } + + q = q_dirBW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTE])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ)); + wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloZ)); + wallMomentumZ -= - (c6o1*c1o54*(-VeloX-VeloZ)); + } + + q = q_dirBE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTW])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ)); + wallMomentumX += - (c6o1*c1o54*( VeloX-VeloZ)); + wallMomentumZ -= - (c6o1*c1o54*( VeloX-VeloZ)); + } + + q = q_dirTW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBE])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ)); + wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloZ)); + wallMomentumZ += - (c6o1*c1o54*(-VeloX+VeloZ)); + } + + q = q_dirTN[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBS])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ)); + wallMomentumY += - (c6o1*c1o54*( VeloY+VeloZ)); + wallMomentumZ += - (c6o1*c1o54*( VeloY+VeloZ)); + } + + q = q_dirBS[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTN])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ)); + wallMomentumY -= - (c6o1*c1o54*( -VeloY-VeloZ)); + wallMomentumZ -= - (c6o1*c1o54*( -VeloY-VeloZ)); + } + + q = q_dirBN[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTS])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ)); + wallMomentumY += - (c6o1*c1o54*( VeloY-VeloZ)); + wallMomentumZ -= - (c6o1*c1o54*( VeloY-VeloZ)); + } + + q = q_dirTS[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBN])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ)); + wallMomentumY -= - (c6o1*c1o54*( -VeloY+VeloZ)); + wallMomentumZ += - (c6o1*c1o54*( -VeloY+VeloZ)); + } + + q = q_dirTNE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBSW])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ)); + wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY+VeloZ)); + wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY+VeloZ)); + wallMomentumZ += - (c6o1*c1o216*( VeloX+VeloY+VeloZ)); + } + + q = q_dirBSW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTNE])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ)); + wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ)); + wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ)); + wallMomentumZ -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ)); + } + + q = q_dirBNE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTSW])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ)); + wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY-VeloZ)); + wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY-VeloZ)); + wallMomentumZ -= - (c6o1*c1o216*( VeloX+VeloY-VeloZ)); + } + + q = q_dirTSW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBNE])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ)); + wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ)); + wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ)); + wallMomentumZ += - (c6o1*c1o216*(-VeloX-VeloY+VeloZ)); + } + + q = q_dirTSE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBNW])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ)); + wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY+VeloZ)); + wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY+VeloZ)); + wallMomentumZ += - (c6o1*c1o216*( VeloX-VeloY+VeloZ)); + } + + q = q_dirBNW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTSE])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ)); + wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ)); + wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY-VeloZ)); + wallMomentumZ -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ)); + } + + q = q_dirBSE[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirTNW])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ)); + wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY-VeloZ)); + wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ)); + wallMomentumZ -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ)); + } + + q = q_dirTNW[k]; + if (q>=c0o1 && q<=c1o1) + { + (D.f[dirBSE])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ)); + wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY+VeloZ)); + wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ)); + wallMomentumZ += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ)); + } + + if(hasWallModelMonitor) + { + Fx_monitor[k] = wallMomentumX; + Fy_monitor[k] = wallMomentumY; + Fz_monitor[k] = wallMomentumZ; + } + + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu index b232ca7ef22d607420cc4cbbfb39cccb41618868..d510a4fe6f0f842d7882bef2eb4804461e986026 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu @@ -69,7 +69,7 @@ extern "C" __global__ void calcAMD(real* vx, (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy); - turbulentViscosity[k] = -SGSConstant*enumerator/denominator; + turbulentViscosity[k] = max(c0o1,-SGSConstant*enumerator)/denominator; } extern "C" void calcTurbulentViscosityAMD(Parameter* para, int level) @@ -88,5 +88,6 @@ extern "C" void calcTurbulentViscosityAMD(Parameter* para, int level) para->getParD(level)->size_Mat_SP, para->getSGSConstant() ); + getLastCudaError("calcAMD execution failed"); } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu index b457a782e5aa4922b298ed1500c31b230950cd6b..f2f02c6df050166259dc23f816b0c2829f85dc0c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu @@ -26,9 +26,17 @@ // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // -//! \file Cumulant27chim.cu -//! \ingroup GPU -//! \author Martin Schoenherr +//! \file TurbulentViscosityCumulantK17CompChim_Device.cu +//! \author Henry Korb, Henrik Asmuth +//! \date 16/05/2022 +//! \brief CumulantK17CompChim kernel by Martin Schönherr that inlcudes turbulent viscosity and other small mods. +//! +//! Additions to CumulantK17CompChim: +//! - can incorporate local body force +//! - when applying a local body force, the total round of error of forcing+bodyforce is saved and added in next time step +//! - uses turbulent viscosity that is computed in separate kernel (as of now AMD) +//! - saves macroscopic values (needed for instance for probes, AMD, and actuator models) +//! //======================================================================================= /* Device code */ #include "LBM/LB.h" @@ -227,19 +235,46 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( real fz = forces[2]; if( bodyForce ){ - fx += bodyForceX[k]; + fx += bodyForceX[k]; fy += bodyForceY[k]; fz += bodyForceZ[k]; - //Reset body force - bodyForceX[k] = 0.0f; - bodyForceY[k] = 0.0f; - bodyForceZ[k] = 0.0f; + real vx = vvx; + real vy = vvy; + real vz = vvz; + real acc_x = fx * c1o2 / factor; + real acc_y = fy * c1o2 / factor; + real acc_z = fz * c1o2 / factor; + + vvx += acc_x; + vvy += acc_y; + vvz += acc_z; + + // // Reset body force. To be used when not using round-off correction. + // bodyForceX[k] = 0.0f; + // bodyForceY[k] = 0.0f; + // bodyForceZ[k] = 0.0f; + + //////////////////////////////////////////////////////////////////////////////////// + //!> Round-off correction + //! + //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) + //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation. + //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can + //!> differ by several orders of magnitude. + //!> \note 16/05/2022: Testing, still ongoing! + //! + bodyForceX[k] = (acc_x-(vvx-vx))*factor*c2o1; + bodyForceY[k] = (acc_y-(vvy-vy))*factor*c2o1; + bodyForceZ[k] = (acc_z-(vvz-vz))*factor*c2o1; + } + else{ + vvx += fx * c1o2 / factor; + vvy += fy * c1o2 / factor; + vvz += fz * c1o2 / factor; } - vvx += fx * c1o2 / factor; - vvy += fy * c1o2 / factor; - vvz += fz * c1o2 / factor; + //////////////////////////////////////////////////////////////////////////////////// // calculate the square of velocities for this lattice node real vx2 = vvx * vvx; @@ -315,10 +350,10 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. //! - //////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////// //! - Calculate modified omega with turbulent viscosity //! - real omega = omega_in / (c1o1 + c3o1*omega_in*max(c0o1, turbulentViscosity[k])); + real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k]); //////////////////////////////////////////////////////////// // 2. real OxxPyyPzz = c1o1; @@ -429,6 +464,24 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); real dyuy = dxux + omega * c3o2 * mxxMyy; real dzuz = dxux + omega * c3o2 * mxxMzz; + + //Smagorinsky for debugging + // if(true) + // { + // if(false && k==99976) + // { + // printf("dudz+dwdu: \t %1.14f \n", Dxz ); + // printf("dvdz+dudy: \t %1.14f \n", Dxy ); + // printf("dwdy+dvdz: \t %1.14f \n", Dyz ); + // printf("nu_t * dudz+dwdu: \t %1.14f \n", turbulentViscosity[k]*Dxz ); + // printf("nu_t * dvdz+dudy: \t %1.14f \n", turbulentViscosity[k]*Dxy ); + // printf("nu_t * dwdy+dvdz: \t %1.14f \n", turbulentViscosity[k]*Dyz ); + // } + // real Sbar = sqrt(c2o1*(dxux*dxux+dyuy*dyuy+dzuz*dzuz)+Dxy*Dxy+Dxz*Dxz+Dyz*Dyz); + // real Cs = 0.08f; + // turbulentViscosity[k] = Cs*Cs*Sbar; + // } + //////////////////////////////////////////////////////////// //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), @@ -670,4 +723,875 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( } -} \ No newline at end of file +} + + + + +//WORK IN PROGRESS: Incorporating DistributionWrapper in kernel..... + +// //======================================================================================= +// // ____ ____ __ ______ __________ __ __ __ __ +// // \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// // \ \ | | | | | |_) | | | | | | | / \ | | +// // \ \ | | | | | _ / | | | | | | / /\ \ | | +// // \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// // \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// // \ \ | | ________________________________________________________________ +// // \ \ | | | ______________________________________________________________| +// // \ \| | | | __ __ __ __ ______ _______ +// // \ | | |_____ | | | | | | | | | _ \ / _____) +// // \ | | _____| | | | | | | | | | | \ \ \_______ +// // \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// // \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// // +// // This file is part of VirtualFluids. VirtualFluids is free software: you can +// // redistribute it and/or modify it under the terms of the GNU General Public +// // License as published by the Free Software Foundation, either version 3 of +// // the License, or (at your option) any later version. +// // +// // VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// // for more details. +// // +// // You should have received a copy of the GNU General Public License along +// // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// // +// //! \file TurbulentViscosityCumulantK17CompChim_Device.cu +// //! \author Henry Korb, Henrik Asmuth +// //! \date 16/05/2022 +// //! \brief CumulantK17CompChim kernel by Martin Schönherr that inlcudes turbulent viscosity and other small mods. +// //! +// //! Additions to CumulantK17CompChim: +// //! - can incorporate local body force +// //! - when applying a local body force, the total round of error of forcing+bodyforce is saved and added in next time step +// //! - uses turbulent viscosity that is computed in separate kernel (as of now AMD) +// //! - saves macroscopic values (needed for instance for probes, AMD, and actuator models) +// //! +// //======================================================================================= +// /* Device code */ +// #include "LBM/LB.h" +// #include "LBM/D3Q27.h" +// #include <lbm/constants/NumericConstants.h> + +// using namespace vf::lbm::constant; +// #include "Kernel/ChimeraTransformation.h" + +// #include "Kernel/Utilities/DistributionHelper.cuh" + +// #include "lbm/MacroscopicQuantities.h" + +// //////////////////////////////////////////////////////////////////////////////// +// extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( +// real omega_in, +// uint* typeOfGridNode, +// uint* neighborX, +// uint* neighborY, +// uint* neighborZ, +// real* distributions, +// real* rho, +// real* vx, +// real* vy, +// real* vz, +// real* turbulentViscosity, +// unsigned long size_Mat, +// int level, +// bool bodyForce, +// real* forces, +// real* bodyForceX, +// real* bodyForceY, +// real* bodyForceZ, +// real* quadricLimiters, +// bool isEvenTimestep) +// { +// ////////////////////////////////////////////////////////////////////////// +// //! Cumulant K17 Kernel is based on \ref +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +// //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a> +// //! +// //! The cumulant kernel is executed in the following steps +// //! +// //////////////////////////////////////////////////////////////////////////////// +// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. +// //! + +// // const unsigned x = threadIdx.x; +// // const unsigned y = blockIdx.x; +// // const unsigned z = blockIdx.y; + +// // const unsigned nx = blockDim.x; +// // const unsigned ny = gridDim.x; + +// // const unsigned k = nx * (ny * z + y) + x; +// const unsigned k = vf::gpu::getNodeIndex(); +// ////////////////////////////////////////////////////////////////////////// +// // run for all indices in size_Mat and fluid nodes +// // if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) { +// if ((k < size_Mat) && vf::gpu::isValidFluidNode(typeOfGridNode[k])) { +// ////////////////////////////////////////////////////////////////////////// +// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on +// //! timestep is based on the esoteric twist algorithm \ref <a +// //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), +// //! DOI:10.3390/computation5020019 ]</b></a> +// //! + +// vf::gpu::DistributionWrapper distr_wrapper( distributions, size_Mat, +// isEvenTimestep, k, +// neighborX, neighborY, neighborZ); + +// Distributions27 dist; +// if (isEvenTimestep) { +// dist.f[dirE] = &distributions[dirE * size_Mat]; +// dist.f[dirW] = &distributions[dirW * size_Mat]; +// dist.f[dirN] = &distributions[dirN * size_Mat]; +// dist.f[dirS] = &distributions[dirS * size_Mat]; +// dist.f[dirT] = &distributions[dirT * size_Mat]; +// dist.f[dirB] = &distributions[dirB * size_Mat]; +// dist.f[dirNE] = &distributions[dirNE * size_Mat]; +// dist.f[dirSW] = &distributions[dirSW * size_Mat]; +// dist.f[dirSE] = &distributions[dirSE * size_Mat]; +// dist.f[dirNW] = &distributions[dirNW * size_Mat]; +// dist.f[dirTE] = &distributions[dirTE * size_Mat]; +// dist.f[dirBW] = &distributions[dirBW * size_Mat]; +// dist.f[dirBE] = &distributions[dirBE * size_Mat]; +// dist.f[dirTW] = &distributions[dirTW * size_Mat]; +// dist.f[dirTN] = &distributions[dirTN * size_Mat]; +// dist.f[dirBS] = &distributions[dirBS * size_Mat]; +// dist.f[dirBN] = &distributions[dirBN * size_Mat]; +// dist.f[dirTS] = &distributions[dirTS * size_Mat]; +// dist.f[dirZERO] = &distributions[dirZERO * size_Mat]; +// dist.f[dirTNE] = &distributions[dirTNE * size_Mat]; +// dist.f[dirTSW] = &distributions[dirTSW * size_Mat]; +// dist.f[dirTSE] = &distributions[dirTSE * size_Mat]; +// dist.f[dirTNW] = &distributions[dirTNW * size_Mat]; +// dist.f[dirBNE] = &distributions[dirBNE * size_Mat]; +// dist.f[dirBSW] = &distributions[dirBSW * size_Mat]; +// dist.f[dirBSE] = &distributions[dirBSE * size_Mat]; +// dist.f[dirBNW] = &distributions[dirBNW * size_Mat]; +// } else { +// dist.f[dirW] = &distributions[dirE * size_Mat]; +// dist.f[dirE] = &distributions[dirW * size_Mat]; +// dist.f[dirS] = &distributions[dirN * size_Mat]; +// dist.f[dirN] = &distributions[dirS * size_Mat]; +// dist.f[dirB] = &distributions[dirT * size_Mat]; +// dist.f[dirT] = &distributions[dirB * size_Mat]; +// dist.f[dirSW] = &distributions[dirNE * size_Mat]; +// dist.f[dirNE] = &distributions[dirSW * size_Mat]; +// dist.f[dirNW] = &distributions[dirSE * size_Mat]; +// dist.f[dirSE] = &distributions[dirNW * size_Mat]; +// dist.f[dirBW] = &distributions[dirTE * size_Mat]; +// dist.f[dirTE] = &distributions[dirBW * size_Mat]; +// dist.f[dirTW] = &distributions[dirBE * size_Mat]; +// dist.f[dirBE] = &distributions[dirTW * size_Mat]; +// dist.f[dirBS] = &distributions[dirTN * size_Mat]; +// dist.f[dirTN] = &distributions[dirBS * size_Mat]; +// dist.f[dirTS] = &distributions[dirBN * size_Mat]; +// dist.f[dirBN] = &distributions[dirTS * size_Mat]; +// dist.f[dirZERO] = &distributions[dirZERO * size_Mat]; +// dist.f[dirBSW] = &distributions[dirTNE * size_Mat]; +// dist.f[dirBNE] = &distributions[dirTSW * size_Mat]; +// dist.f[dirBNW] = &distributions[dirTSE * size_Mat]; +// dist.f[dirBSE] = &distributions[dirTNW * size_Mat]; +// dist.f[dirTSW] = &distributions[dirBNE * size_Mat]; +// dist.f[dirTNE] = &distributions[dirBSW * size_Mat]; +// dist.f[dirTNW] = &distributions[dirBSE * size_Mat]; +// dist.f[dirTSE] = &distributions[dirBNW * size_Mat]; +// } +// //////////////////////////////////////////////////////////////////////////////// +// //! - Set neighbor indices (necessary for indirect addressing) +// uint kw = neighborX[k]; +// uint ks = neighborY[k]; +// uint kb = neighborZ[k]; +// uint ksw = neighborY[kw]; +// uint kbw = neighborZ[kw]; +// uint kbs = neighborZ[ks]; +// uint kbsw = neighborZ[ksw]; +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Set local distributions +// //! + +// // real mfcbb = distr_wrapper.distribution.f[dirE]; +// // real mfabb = distr_wrapper.distribution.f[dirW]; +// // real mfbcb = distr_wrapper.distribution.f[dirN]; +// // real mfbab = distr_wrapper.distribution.f[dirS]; +// // real mfbbc = distr_wrapper.distribution.f[dirT]; +// // real mfbba = distr_wrapper.distribution.f[dirB]; +// // real mfccb = distr_wrapper.distribution.f[dirNE]; +// // real mfaab = distr_wrapper.distribution.f[dirSW]; +// // real mfcab = distr_wrapper.distribution.f[dirSE]; +// // real mfacb = distr_wrapper.distribution.f[dirNW]; +// // real mfcbc = distr_wrapper.distribution.f[dirTE]; +// // real mfaba = distr_wrapper.distribution.f[dirBW]; +// // real mfcba = distr_wrapper.distribution.f[dirBE]; +// // real mfabc = distr_wrapper.distribution.f[dirTW]; +// // real mfbcc = distr_wrapper.distribution.f[dirTN]; +// // real mfbaa = distr_wrapper.distribution.f[dirBS]; +// // real mfbca = distr_wrapper.distribution.f[dirBN]; +// // real mfbac = distr_wrapper.distribution.f[dirTS]; +// // real mfbbb = distr_wrapper.distribution.f[dirZERO]; +// // real mfccc = distr_wrapper.distribution.f[dirTNE]; +// // real mfaac = distr_wrapper.distribution.f[dirTSW]; +// // real mfcac = distr_wrapper.distribution.f[dirTSE]; +// // real mfacc = distr_wrapper.distribution.f[dirTNW]; +// // real mfcca = distr_wrapper.distribution.f[dirBNE]; +// // real mfaaa = distr_wrapper.distribution.f[dirBSW]; +// // real mfcaa = distr_wrapper.distribution.f[dirBSE]; +// // real mfaca = distr_wrapper.distribution.f[dirBNW]; + + +// real mfcbb = (dist.f[dirE])[k]; +// real mfabb = (dist.f[dirW])[kw]; +// real mfbcb = (dist.f[dirN])[k]; +// real mfbab = (dist.f[dirS])[ks]; +// real mfbbc = (dist.f[dirT])[k]; +// real mfbba = (dist.f[dirB])[kb]; +// real mfccb = (dist.f[dirNE])[k]; +// real mfaab = (dist.f[dirSW])[ksw]; +// real mfcab = (dist.f[dirSE])[ks]; +// real mfacb = (dist.f[dirNW])[kw]; +// real mfcbc = (dist.f[dirTE])[k]; +// real mfaba = (dist.f[dirBW])[kbw]; +// real mfcba = (dist.f[dirBE])[kb]; +// real mfabc = (dist.f[dirTW])[kw]; +// real mfbcc = (dist.f[dirTN])[k]; +// real mfbaa = (dist.f[dirBS])[kbs]; +// real mfbca = (dist.f[dirBN])[kb]; +// real mfbac = (dist.f[dirTS])[ks]; +// real mfbbb = (dist.f[dirZERO])[k]; +// real mfccc = (dist.f[dirTNE])[k]; +// real mfaac = (dist.f[dirTSW])[ksw]; +// real mfcac = (dist.f[dirTSE])[ks]; +// real mfacc = (dist.f[dirTNW])[kw]; +// real mfcca = (dist.f[dirBNE])[kb]; +// real mfaaa = (dist.f[dirBSW])[kbsw]; +// real mfcaa = (dist.f[dirBSE])[kbs]; +// real mfaca = (dist.f[dirBNW])[kbw]; + +// //////////////////////////////////////////////////////(unsigned long)////////////////////////////// +// //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref +// //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), +// //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> +// //! +// // real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + +// // (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + +// // ((mfacb + mfcab) + (mfaab + mfccb))) + +// // ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + +// // mfbbb; +// real drho = vf::lbm::getDensity(distr_wrapper.distribution.f); + +// real rrho = c1o1 + drho; +// real OOrho = c1o1 / rrho; + +// // real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + +// // (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb)) * +// // OOrho; +// real vvx = vf::lbm::getCompressibleVelocityX1(distr_wrapper.distribution.f, drho); +// // real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + +// // (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab)) * +// // OOrho; +// real vvy = vf::lbm::getCompressibleVelocityX2(distr_wrapper.distribution.f, drho); +// // real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + +// // (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba)) * +// // OOrho; +// real vvz = vf::lbm::getCompressibleVelocityX3(distr_wrapper.distribution.f, drho); +// // if(k==100000){printf("%f \t %f \t%f \t%f \n\n", drho, vvx, vvz, vvy);} +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref +// //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), +// //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> +// //! +// real factor = c1o1; +// for (size_t i = 1; i <= level; i++) { +// factor *= c2o1; +// } + +// real fx = forces[0]; +// real fy = forces[1]; +// real fz = forces[2]; + +// if( bodyForce ){ +// fx += bodyForceX[k]; +// fy += bodyForceY[k]; +// fz += bodyForceZ[k]; + +// real vx = vvx; +// real vy = vvy; +// real vz = vvz; +// real acc_x = fx * c1o2 / factor; +// real acc_y = fy * c1o2 / factor; +// real acc_z = fz * c1o2 / factor; + +// vvx += acc_x; +// vvy += acc_y; +// vvz += acc_z; + +// // // Reset body force. To be used when not using round-off correction. +// // bodyForceX[k] = 0.0f; +// // bodyForceY[k] = 0.0f; +// // bodyForceZ[k] = 0.0f; + +// //////////////////////////////////////////////////////////////////////////////////// +// //!> Round-off correction +// //! +// //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) +// //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation. +// //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can +// //!> differ by several orders of magnitude. +// //!> \note 16/05/2022: Testing, still ongoing! +// //! +// bodyForceX[k] = (acc_x-(double)(vvx-vx))*factor*c2o1; +// bodyForceY[k] = (acc_y-(double)(vvy-vy))*factor*c2o1; +// bodyForceZ[k] = (acc_z-(double)(vvz-vz))*factor*c2o1; + +// } +// else{ +// vvx += fx * c1o2 / factor; +// vvy += fy * c1o2 / factor; +// vvz += fz * c1o2 / factor; +// } + + +// //////////////////////////////////////////////////////////////////////////////////// +// // calculate the square of velocities for this lattice node +// real vx2 = vvx * vvx; +// real vy2 = vvy * vvy; +// real vz2 = vvz * vvz; +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to +// //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! +// real wadjust; +// real qudricLimitP = quadricLimiters[0]; +// real qudricLimitM = quadricLimiters[1]; +// real qudricLimitD = quadricLimiters[2]; +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref +// //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), +// //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a +// //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +// //! ]</b></a> +// //! +// //////////////////////////////////////////////////////////////////////////////////// +// // Z - Dir +// forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); +// forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); +// forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); +// forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); +// forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); +// forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); +// forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); +// forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); +// forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); + +// //////////////////////////////////////////////////////////////////////////////////// +// // Y - Dir +// forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); +// forwardChimera(mfaab, mfabb, mfacb, vvy, vy2); +// forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); +// forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); +// forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); +// forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); +// forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); +// forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); +// forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); + +// //////////////////////////////////////////////////////////////////////////////////// +// // X - Dir +// forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); +// forwardChimera(mfaba, mfbba, mfcba, vvx, vx2); +// forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); +// forwardChimera(mfaab, mfbab, mfcab, vvx, vx2); +// forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); +// forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); +// forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); +// forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); +// forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations +// //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. +// //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk +// //! viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. +// //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz +// //! \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. +// //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz +// //! \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. +// //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with +// //! simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). +// //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification +// //! all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. +// //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. +// //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. +// //! +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Calculate modified omega with turbulent viscosity +// //! +// real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k]); +// //////////////////////////////////////////////////////////// +// // 2. +// real OxxPyyPzz = c1o1; +// //////////////////////////////////////////////////////////// +// // 3. +// real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega); +// real OxyyMxzz = +// c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega); +// real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) / +// (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega); +// //////////////////////////////////////////////////////////// +// // 4. +// real O4 = c1o1; +// //////////////////////////////////////////////////////////// +// // 5. +// real O5 = c1o1; +// //////////////////////////////////////////////////////////// +// // 6. +// real O6 = c1o1; + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for +// //! different bulk viscosity). +// //! +// real A = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega); +// real B = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Compute cumulants from central moments according to Eq. (20)-(23) in +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! +// //////////////////////////////////////////////////////////// +// // 4. +// real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho; +// real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho; +// real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho; + +// real CUMcca = +// mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho)); +// real CUMcac = +// mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho)); +// real CUMacc = +// mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho)); +// //////////////////////////////////////////////////////////// +// // 5. +// real CUMbcc = +// mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + +// c1o3 * (mfbca + mfbac)) * +// OOrho; +// real CUMcbc = +// mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + +// c1o3 * (mfcba + mfabc)) * +// OOrho; +// real CUMccb = +// mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + +// c1o3 * (mfacb + mfcab)) * +// OOrho; +// //////////////////////////////////////////////////////////// +// // 6. +// real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - +// c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - +// c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * +// OOrho + +// (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + +// c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) * +// OOrho * OOrho - +// c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + +// (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + +// (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * +// OOrho * OOrho * c2o3 + +// c1o27 * ((drho * drho - drho) * OOrho * OOrho)); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Compute linear combinations of second and third order cumulants +// //! +// //////////////////////////////////////////////////////////// +// // 2. +// real mxxPyyPzz = mfcaa + mfaca + mfaac; +// real mxxMyy = mfcaa - mfaca; +// real mxxMzz = mfcaa - mfaac; +// //////////////////////////////////////////////////////////// +// // 3. +// real mxxyPyzz = mfcba + mfabc; +// real mxxyMyzz = mfcba - mfabc; + +// real mxxzPyyz = mfcab + mfacb; +// real mxxzMyyz = mfcab - mfacb; + +// real mxyyPxzz = mfbca + mfbac; +// real mxyyMxzz = mfbca - mfbac; + +// //////////////////////////////////////////////////////////////////////////////////// +// // incl. correction +// //////////////////////////////////////////////////////////// +// //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of +// //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), +// //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times +// //! the gradients later. +// //! +// real Dxy = -c3o1 * omega * mfbba; +// real Dxz = -c3o1 * omega * mfbab; +// real Dyz = -c3o1 * omega * mfabb; +// real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); +// real dyuy = dxux + omega * c3o2 * mxxMyy; +// real dzuz = dxux + omega * c3o2 * mxxMzz; + +// //Smagorinsky for debugging +// // if(true) +// // { +// // if(false && k==99976) +// // { +// // printf("dudz+dwdu: \t %1.14f \n", Dxz ); +// // printf("dvdz+dudy: \t %1.14f \n", Dxy ); +// // printf("dwdy+dvdz: \t %1.14f \n", Dyz ); +// // printf("nu_t * dudz+dwdu: \t %1.14f \n", turbulentViscosity[k]*Dxz ); +// // printf("nu_t * dvdz+dudy: \t %1.14f \n", turbulentViscosity[k]*Dxy ); +// // printf("nu_t * dwdy+dvdz: \t %1.14f \n", turbulentViscosity[k]*Dyz ); +// // } +// // real Sbar = sqrt(c2o1*(dxux*dxux+dyuy*dyuy+dzuz*dzuz)+Dxy*Dxy+Dxz*Dxz+Dyz*Dyz); +// // real Cs = 0.08f; +// // turbulentViscosity[k] = Cs*Cs*Sbar; +// // } + +// //////////////////////////////////////////////////////////// +// //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! +// mxxPyyPzz += +// OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); +// mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); +// mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); + +// //////////////////////////////////////////////////////////////////////////////////// +// ////no correction +// // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); +// // mxxMyy += -(-omega) * (-mxxMyy); +// // mxxMzz += -(-omega) * (-mxxMzz); +// ////////////////////////////////////////////////////////////////////////// +// mfabb += omega * (-mfabb); +// mfbab += omega * (-mfbab); +// mfbba += omega * (-mfbba); + +// //////////////////////////////////////////////////////////////////////////////////// +// // relax +// ////////////////////////////////////////////////////////////////////////// +// // incl. limiter +// //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! +// wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD); +// mfbbb += wadjust * (-mfbbb); +// wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); +// mxxyPyzz += wadjust * (-mxxyPyzz); +// wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); +// mxxyMyzz += wadjust * (-mxxyMyzz); +// wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); +// mxxzPyyz += wadjust * (-mxxzPyyz); +// wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); +// mxxzMyyz += wadjust * (-mxxzMyyz); +// wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); +// mxyyPxzz += wadjust * (-mxyyPxzz); +// wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); +// mxyyMxzz += wadjust * (-mxyyMxzz); +// ////////////////////////////////////////////////////////////////////////// +// // no limiter +// // mfbbb += OxyyMxzz * (-mfbbb); +// // mxxyPyzz += OxyyPxzz * (-mxxyPyzz); +// // mxxyMyzz += OxyyMxzz * (-mxxyMyzz); +// // mxxzPyyz += OxyyPxzz * (-mxxzPyyz); +// // mxxzMyyz += OxyyMxzz * (-mxxzMyyz); +// // mxyyPxzz += OxyyPxzz * (-mxyyPxzz); +// // mxyyMxzz += OxyyMxzz * (-mxyyMxzz); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Compute inverse linear combinations of second and third order cumulants +// //! +// mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); +// mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz); +// mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz); + +// mfcba = (mxxyMyzz + mxxyPyzz) * c1o2; +// mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; +// mfcab = (mxxzMyyz + mxxzPyyz) * c1o2; +// mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; +// mfbca = (mxyyMxzz + mxyyPxzz) * c1o2; +// mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; +// ////////////////////////////////////////////////////////////////////////// + +// ////////////////////////////////////////////////////////////////////////// +// // 4. +// // no limiter +// //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according +// //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! +// CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc); +// CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac); +// CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca); +// CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc); +// CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb); +// CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb); + +// ////////////////////////////////////////////////////////////////////////// +// // 5. +// CUMbcc += O5 * (-CUMbcc); +// CUMcbc += O5 * (-CUMcbc); +// CUMccb += O5 * (-CUMccb); + +// ////////////////////////////////////////////////////////////////////////// +// // 6. +// CUMccc += O6 * (-CUMccc); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in +// //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), +// //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +// //! + +// ////////////////////////////////////////////////////////////////////////// +// // 4. +// mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; +// mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho; +// mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; + +// mfcca = +// CUMcca + +// (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9; +// mfcac = +// CUMcac + +// (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9; +// mfacc = +// CUMacc + +// (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9; + +// ////////////////////////////////////////////////////////////////////////// +// // 5. +// mfbcc = CUMbcc + c1o3 * +// (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + +// c2o1 * (mfbab * mfacb + mfbba * mfabc)) + +// (mfbca + mfbac)) * +// OOrho; +// mfcbc = CUMcbc + c1o3 * +// (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + +// c2o1 * (mfabb * mfcab + mfbba * mfbac)) + +// (mfcba + mfabc)) * +// OOrho; +// mfccb = CUMccb + c1o3 * +// (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + +// c2o1 * (mfbab * mfbca + mfabb * mfcba)) + +// (mfacb + mfcab)) * +// OOrho; + +// ////////////////////////////////////////////////////////////////////////// +// // 6. +// mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - +// c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - +// c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * +// OOrho + +// (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + +// c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) * +// OOrho * OOrho - +// c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + +// (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + +// (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * +// OOrho * OOrho * c2o3 + +// c1o27 * ((drho * drho - drho) * OOrho * OOrho)); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in +// //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), +// //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> +// //! +// mfbaa = -mfbaa; +// mfaba = -mfaba; +// mfaab = -mfaab; + + +// //Write to array here to distribute read/write +// rho[k] = drho; +// vx[k] = vvx; +// vy[k] = vvy; +// vz[k] = vvz; + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in +// //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), +// //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a +// //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +// //! ]</b></a> +// //! +// //////////////////////////////////////////////////////////////////////////////////// +// // X - Dir +// backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); +// backwardChimera(mfaba, mfbba, mfcba, vvx, vx2); +// backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); +// backwardChimera(mfaab, mfbab, mfcab, vvx, vx2); +// backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); +// backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); +// backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); +// backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); +// backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9); + +// //////////////////////////////////////////////////////////////////////////////////// +// // Y - Dir +// backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); +// backwardChimera(mfaab, mfabb, mfacb, vvy, vy2); +// backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); +// backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); +// backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); +// backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); +// backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); +// backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); +// backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); + +// //////////////////////////////////////////////////////////////////////////////////// +// // Z - Dir +// backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); +// backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); +// backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); +// backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); +// backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); +// backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); +// backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); +// backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); +// backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); + +// //////////////////////////////////////////////////////////////////////////////////// +// //! - Write distributions: style of reading and writing the distributions from/to +// //! stored arrays dependent on timestep is based on the esoteric twist algorithm +// //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), +// //! DOI:10.3390/computation5020019 ]</b></a> +// //! + + +// distr_wrapper.distribution.f[dirE] = mfabb; +// distr_wrapper.distribution.f[dirW] = mfcbb; +// distr_wrapper.distribution.f[dirN] = mfbab; +// distr_wrapper.distribution.f[dirS] = mfbcb; +// distr_wrapper.distribution.f[dirT] = mfbba; +// distr_wrapper.distribution.f[dirB] = mfbbc; +// distr_wrapper.distribution.f[dirNE] = mfaab; +// distr_wrapper.distribution.f[dirSW] = mfccb; +// distr_wrapper.distribution.f[dirSE] = mfacb; +// distr_wrapper.distribution.f[dirNW] = mfcab; +// distr_wrapper.distribution.f[dirTE] = mfaba; +// distr_wrapper.distribution.f[dirBW] = mfcbc; +// distr_wrapper.distribution.f[dirBE] = mfabc; +// distr_wrapper.distribution.f[dirTW] = mfcba; +// distr_wrapper.distribution.f[dirTN] = mfbaa; +// distr_wrapper.distribution.f[dirBS] = mfbcc; +// distr_wrapper.distribution.f[dirBN] = mfbac; +// distr_wrapper.distribution.f[dirTS] = mfbca; +// distr_wrapper.distribution.f[dirZERO] = mfbbb; +// distr_wrapper.distribution.f[dirTNE] = mfaaa; +// distr_wrapper.distribution.f[dirTSW] = mfaca; +// distr_wrapper.distribution.f[dirTSE] = mfaac; +// distr_wrapper.distribution.f[dirTNW] = mfacc; +// distr_wrapper.distribution.f[dirBNE] = mfcaa; +// distr_wrapper.distribution.f[dirBSW] = mfcca; +// distr_wrapper.distribution.f[dirBSE] = mfcac; +// distr_wrapper.distribution.f[dirBNW] = mfccc; + +// distr_wrapper.write(); +// if(k==100000) +// { +// printf("mfcbb \t %f \t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n\n", +// (dist.f[dirE])[k] , +// (dist.f[dirN])[k] , +// (dist.f[dirS])[ks] , +// (dist.f[dirT])[k] , +// (dist.f[dirB])[kb] , +// (dist.f[dirNE])[k] , +// (dist.f[dirSW])[ksw] , +// (dist.f[dirSE])[ks] , +// (dist.f[dirNW])[kw] , +// (dist.f[dirW])[kw] , +// (dist.f[dirTE])[k] , +// (dist.f[dirBW])[kbw] , +// (dist.f[dirBE])[kb] , +// (dist.f[dirTW])[kw] , +// (dist.f[dirTN])[k] , +// (dist.f[dirBS])[kbs] , +// (dist.f[dirBN])[kb] , +// (dist.f[dirTS])[ks] , +// (dist.f[dirZERO])[k] , +// (dist.f[dirTNE])[k] , +// (dist.f[dirTSE])[ks] , +// (dist.f[dirBNE])[kb] , +// (dist.f[dirBSE])[kbs] , +// (dist.f[dirTNW])[kw] , +// (dist.f[dirTSW])[ksw] , +// (dist.f[dirBNW])[kbw] , +// (dist.f[dirBSW])[kbsw]); +// } + +// (dist.f[dirE])[k] = mfabb; +// (dist.f[dirW])[kw] = mfcbb; +// (dist.f[dirN])[k] = mfbab; +// (dist.f[dirS])[ks] = mfbcb; +// (dist.f[dirT])[k] = mfbba; +// (dist.f[dirB])[kb] = mfbbc; +// (dist.f[dirNE])[k] = mfaab; +// (dist.f[dirSW])[ksw] = mfccb; +// (dist.f[dirSE])[ks] = mfacb; +// (dist.f[dirNW])[kw] = mfcab; +// (dist.f[dirTE])[k] = mfaba; +// (dist.f[dirBW])[kbw] = mfcbc; +// (dist.f[dirBE])[kb] = mfabc; +// (dist.f[dirTW])[kw] = mfcba; +// (dist.f[dirTN])[k] = mfbaa; +// (dist.f[dirBS])[kbs] = mfbcc; +// (dist.f[dirBN])[kb] = mfbac; +// (dist.f[dirTS])[ks] = mfbca; +// (dist.f[dirZERO])[k] = mfbbb; +// (dist.f[dirTNE])[k] = mfaaa; +// (dist.f[dirTSE])[ks] = mfaca; +// (dist.f[dirBNE])[kb] = mfaac; +// (dist.f[dirBSE])[kbs] = mfacc; +// (dist.f[dirTNW])[kw] = mfcaa; +// (dist.f[dirTSW])[ksw] = mfcca; +// (dist.f[dirBNW])[kbw] = mfcac; +// (dist.f[dirBSW])[kbsw] = mfccc; + +// if(k==100000) +// { +// printf("mfcbb \t %f \t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n\n\n", +// (dist.f[dirE])[k] , +// (dist.f[dirN])[k] , +// (dist.f[dirS])[ks] , +// (dist.f[dirT])[k] , +// (dist.f[dirB])[kb] , +// (dist.f[dirNE])[k] , +// (dist.f[dirSW])[ksw] , +// (dist.f[dirSE])[ks] , +// (dist.f[dirNW])[kw] , +// (dist.f[dirW])[kw] , +// (dist.f[dirTE])[k] , +// (dist.f[dirBW])[kbw] , +// (dist.f[dirBE])[kb] , +// (dist.f[dirTW])[kw] , +// (dist.f[dirTN])[k] , +// (dist.f[dirBS])[kbs] , +// (dist.f[dirBN])[kb] , +// (dist.f[dirTS])[ks] , +// (dist.f[dirZERO])[k] , +// (dist.f[dirTNE])[k] , +// (dist.f[dirTSE])[ks] , +// (dist.f[dirBNE])[kb] , +// (dist.f[dirBSE])[kbs] , +// (dist.f[dirTNW])[kw] , +// (dist.f[dirTSW])[ksw] , +// (dist.f[dirBNW])[kbw] , +// (dist.f[dirBSW])[kbsw]); +// } +// } +// } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index 7424c473e7482ce1ad997a6241bbc1749e4a668f..17404fb959849663130c7de2a86764f198b9c32e 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -126,6 +126,7 @@ struct InitCondition bool isMeasurePoints {false}; bool isInitNeq {false}; bool isGeoNormal, isInflowNormal, isOutflowNormal; + bool hasWallModelMonitor {false}; bool simulatePorousMedia {false}; bool streetVelocityFile {false}; }; @@ -186,7 +187,10 @@ typedef struct QforBC{ real* q19[19]; int kQ=0; int kArray; - real *Vx, *Vy, *Vz, *deltaVz, *RhoBC; + real *Vx, *Vy, *Vz; + real *Vx1, *Vy1, *Vz1; + real *deltaVz, *RhoBC; + real *normalX, *normalY, *normalZ; }QforBoundaryConditions; //BCTemp @@ -213,6 +217,17 @@ typedef struct TempPressforBC{ int kTemp=0; }TempPressforBoundaryConditions; +// Settings for wall model used in StressBC +typedef struct WMparas{ + real* z0; + int* samplingOffset; + bool hasMonitor; + real* u_star; + real* Fx; + real* Fy; + real* Fz; +}WallModelParameters; + //measurePoints typedef struct MeasP{ std::string name; diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 3c845e24963061f4c61f50cd0084771bce9afae9..a16566ecb734f8c8c5d1e7d5f01df884e6464f6a 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -37,6 +37,8 @@ #include "Calculation/ForceCalculations.h" #include "Calculation/PorousMedia.h" ////////////////////////////////////////////////////////////////////////// +#include "Output/Timer.h" +////////////////////////////////////////////////////////////////////////// #include "Restart/RestartObject.h" ////////////////////////////////////////////////////////////////////////// #include "DataStructureInitializer/GridProvider.h" @@ -126,6 +128,11 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std output << "vis_ratio: " << para->getViscosityRatio() << "\n"; output << "u0_ratio: " << para->getVelocityRatio() << "\n"; output << "delta_rho: " << para->getDensityRatio() << "\n"; + output << "QuadricLimiters: " << para->getQuadricLimitersHost()[0] << "\t" + << para->getQuadricLimitersHost()[1] << "\t" + << para->getQuadricLimitersHost()[2] << "\n"; + if(para->getUseAMD()) + output << "AMD SGS model: " << para->getSGSConstant() << "\n"; ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// @@ -379,14 +386,9 @@ void Simulation::bulk() void Simulation::run() { - double ftimeE, ftimeS, fnups, durchsatz; - float timerE, timerS; - timerE = 0.0f; - timerS = 0.0f; - ftimeE = 0.0f; - ftimeS = 0.0f; unsigned int t, t_prev; unsigned int t_MP = 0; + ////////////////////////////////////////////////////////////////////////// para->setStepEnsight(0); @@ -404,28 +406,21 @@ void Simulation::run() } ////////////////////////////////////////////////////////////////////////// - //Timer SDK - StopWatchInterface *sdkTimer = NULL; - sdkCreateTimer(&sdkTimer); - sdkStartTimer(&sdkTimer); - //Timer Event - cudaEvent_t start_t, stop_t; - checkCudaErrors( cudaEventCreate(&start_t)); - checkCudaErrors( cudaEventCreate(&stop_t)); - checkCudaErrors( cudaEventRecord(start_t)); - t_prev = para->getTimeCalcMedStart(); - output << "Processing time (ms) \t Nups in Mio \t Durchsatz in GB/sec\n"; + output << "getMaxLevel = " << para->getMaxLevel() << "\n"; + + Timer* averageTimer = new Timer("Average performance"); + averageTimer->startTimer(); - output << "getMaxLevel = " << para->getMaxLevel() << "\n"; //////////////////////////////////////////////////////////////////////////////// // Time loop //////////////////////////////////////////////////////////////////////////////// for(t=para->getTStart();t<=para->getTEnd();t++) { + updateGrid27(para.get(), communicator, cudaManager.get(), pm, 0, t, kernels); - + //////////////////////////////////////////////////////////////////////////////// //Particles //////////////////////////////////////////////////////////////////////////////// @@ -497,14 +492,8 @@ void Simulation::run() //////////////////////////////////////////////////////////////////////////////// if(para->getDoCheckPoint() && para->getTimeDoCheckPoint()>0 && t%para->getTimeDoCheckPoint()==0 && t>0 && !para->overWritingRestart(t)) { + averageTimer->stopTimer(); ////////////////////////////////////////////////////////////////////////// - //Timer SDK - sdkStopTimer(&sdkTimer); - sdkResetTimer(&sdkTimer); - ////////////////////////////////////////////////////////////////////////// - //Timer Event - checkCudaErrors( cudaEventRecord(stop_t)); - checkCudaErrors( cudaEventSynchronize(stop_t)); if( para->getDoCheckPoint() ) { @@ -523,11 +512,7 @@ void Simulation::run() output << "\n fertig\n"; } ////////////////////////////////////////////////////////////////////////// - //Timer SDK - sdkStartTimer(&sdkTimer); - ////////////////////////////////////////////////////////////////////////// - //Timer Event - checkCudaErrors( cudaEventRecord(start_t)); + averageTimer->startTimer(); } ////////////////////////////////////////////////////////////////////////////// @@ -641,36 +626,10 @@ void Simulation::run() //else para->getParD(0)->evenOrOdd=true; ////////////////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// - //Timer SDK - checkCudaErrors(cudaDeviceSynchronize()); - sdkStopTimer(&sdkTimer); - timerS = sdkGetTimerValue(&sdkTimer); - sdkResetTimer(&sdkTimer); - ftimeS += timerS; - fnups = 0.0; - durchsatz = 0.0; - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) - { - fnups += 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP * pow(2.,lev) / (ftimeS*1.0E6); - durchsatz += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (ftimeS*1.0E9); - } - output << timerS << " / " << ftimeS << " \t " << fnups << " \t " << durchsatz << "\n"; - ////////////////////////////////////////////////////////////////////////// - //Timer Event - checkCudaErrors( cudaEventRecord(stop_t)); - checkCudaErrors( cudaEventSynchronize(stop_t)); - checkCudaErrors( cudaEventElapsedTime( &timerE, start_t, stop_t)); - ftimeE += timerE; - fnups = 0.0; - durchsatz = 0.0; - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) - { - fnups += 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP * pow(2.,lev) / (ftimeE*1.0E6); - durchsatz += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (ftimeE*1.0E9); - } - output << timerE << " / " << ftimeE << " \t " << fnups << " \t " << durchsatz << "\n"; + ////////////////////////////////////////////////////////////////////////// + averageTimer->stopTimer(); + averageTimer->outputPerformance(t, para.get()); + ////////////////////////////////////////////////////////////////////////// if( para->getPrintFiles() ) { @@ -942,44 +901,11 @@ void Simulation::run() output << "done.\n"; //////////////////////////////////////////////////////////////////////// } - sdkStartTimer(&sdkTimer); - checkCudaErrors( cudaEventRecord(start_t)); - } - } - - ////////////////////////////////////////////////////////////////////////// - //Timer SDK - sdkStopTimer(&sdkTimer); - timerS = sdkGetTimerValue(&sdkTimer); - ftimeS += timerS; - fnups = 0.0; - durchsatz = 0.0; - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) - { - fnups += 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP * pow(2.,lev) / (ftimeS*1.0E6); - durchsatz += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (ftimeS*1.0E9); + //////////////////////////////////////////////////////////////////////// + averageTimer->startTimer(); + } } - output << "Processing time: " << ftimeS << "(ms)\n"; - output << "Nups in Mio: " << fnups << "\n"; - output << "Durchsatz in GB/sec: " << durchsatz << "\n"; - ////////////////////////////////////////////////////////////////////////// - //Timer Event - checkCudaErrors( cudaEventRecord(stop_t)); - checkCudaErrors( cudaEventSynchronize(stop_t)); - checkCudaErrors( cudaEventElapsedTime( &timerE, start_t, stop_t )); - ftimeE += timerE; - fnups = 0.0; - durchsatz = 0.0; - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) - { - fnups += 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP * pow(2.,lev) / (ftimeE*1.0E6); - durchsatz += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (ftimeE*1.0E9); - } - output << "Processing time: " << ftimeE << "(ms)\n"; - output << "Nups in Mio: " << fnups << "\n"; - output << "Durchsatz in GB/sec: " << durchsatz << "\n"; - ////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //printDragLift(para); @@ -1019,11 +945,7 @@ void Simulation::run() // MeasurePointWriter::writeMeasurePoints(para, lev, j, 0); // } //} - // ////////////////////////////////////////////////////////////////////////// - - checkCudaErrors(cudaEventDestroy(start_t)); - checkCudaErrors(cudaEventDestroy(stop_t)); - sdkDeleteTimer(&sdkTimer); + // ////////////////////////////////////////////////////////////////////////// } void Simulation::porousMedia() diff --git a/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..78da4947d0c8196cda49fef754a3f44fc39d0a44 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp @@ -0,0 +1,64 @@ +#include "helper_cuda.h" +#include <cuda_runtime.h> +#include "Core/DataTypes.h" +#include "UbScheduler.h" +#include "Parameter/Parameter.h" + +#include "Timer.h" +#include "TimeStepTimer.h" + +void TimeStepTimer::startTotalTimer (uint t){ if(t%this->tActivate==0) this->totalTimer->startTimer(); } +void TimeStepTimer::stopTotalTimer (uint t){ if(t%this->tActivate==0) this->totalTimer->stopTimer(); } +void TimeStepTimer::startCollisionTimer (uint t){ if(t%this->tActivate==0) this->collisionTimer->startTimer(); } +void TimeStepTimer::stopCollisionTimer (uint t){ if(t%this->tActivate==0) this->collisionTimer->stopTimer(); } +void TimeStepTimer::startPostCollisionBCTimer (uint t){ if(t%this->tActivate==0) this->postCollisionBCTimer->startTimer(); } +void TimeStepTimer::stopPostCollisionBCTimer (uint t){ if(t%this->tActivate==0) this->postCollisionBCTimer->stopTimer(); } +void TimeStepTimer::startPreCollisionBCTimer (uint t){ if(t%this->tActivate==0) this->preCollisionBCTimer->startTimer(); } +void TimeStepTimer::stopPreCollisionBCTimer (uint t){ if(t%this->tActivate==0) this->preCollisionBCTimer->stopTimer(); } +void TimeStepTimer::startEddyViscosityTimer (uint t){ if(t%this->tActivate==0) this->eddyViscosityTimer->startTimer(); } +void TimeStepTimer::stopEddyViscosityTimer (uint t){ if(t%this->tActivate==0) this->eddyViscosityTimer->stopTimer(); } +void TimeStepTimer::startActuatorTimer (uint t){ if(t%this->tActivate==0) this->actuatorTimer->startTimer(); } +void TimeStepTimer::stopActuatorTimer (uint t){ if(t%this->tActivate==0) this->actuatorTimer->stopTimer(); } +void TimeStepTimer::startProbeTimer (uint t){ if(t%this->tActivate==0) this->probeTimer->startTimer(); } +void TimeStepTimer::stopProbeTimer (uint t){ if(t%this->tActivate==0) this->probeTimer->stopTimer(); } +void TimeStepTimer::startExchangeTimer (uint t){ if(t%this->tActivate==0) this->exchangeTimer->startTimer(); } +void TimeStepTimer::stopExchangeTimer (uint t){ if(t%this->tActivate==0) this->exchangeTimer->stopTimer(); } + + +void TimeStepTimer::resetTimers(uint t) +{ + if(t%this->tActivate==0) + { + this->totalTimer->resetTimer(); + this->collisionTimer->resetTimer(); + this->postCollisionBCTimer->resetTimer(); + this->preCollisionBCTimer->resetTimer(); + this->eddyViscosityTimer->resetTimer(); + this->actuatorTimer->resetTimer(); + this->probeTimer->resetTimer(); + } +} + +void TimeStepTimer::outputPerformance(uint t, Parameter* para) +{ + if(t%this->tActivate==0) + { + + float tCollision = this->collisionTimer->getTotalElapsedTime(); + float tPostCollisionBC = this->postCollisionBCTimer->getTotalElapsedTime(); + float tPreCollisionBC = this->preCollisionBCTimer->getTotalElapsedTime(); + float tEddyViscosity = this->eddyViscosityTimer->getTotalElapsedTime(); + float tAcutator = this->actuatorTimer->getTotalElapsedTime(); + float tProbe = this->probeTimer->getTotalElapsedTime(); + float tExchange = this->exchangeTimer->getTotalElapsedTime(); + float tTotal = tCollision+tPostCollisionBC+tPreCollisionBC+tEddyViscosity+tAcutator+tProbe+tExchange; + + VF_LOG_INFO(" --- Collision \t {}%", (tCollision/tTotal)*100 ); + VF_LOG_INFO(" --- PostCollisionBCs \t {}%", (tPostCollisionBC/tTotal)*100 ); + VF_LOG_INFO(" --- PreCollisionBCs \t {}%", (tPreCollisionBC/tTotal)*100 ); + VF_LOG_INFO(" --- Eddy viscosity \t {}%", (tEddyViscosity/tTotal)*100 ); + VF_LOG_INFO(" --- Actuators \t {}%", (tAcutator/tTotal)*100 ); + VF_LOG_INFO(" --- Probes \t\t {}%", (tProbe/tTotal)*100 ); + VF_LOG_INFO(" --- Data exchange \t {}%", (tExchange/tTotal)*100 ); + } +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h new file mode 100644 index 0000000000000000000000000000000000000000..982d1ce56bfadb7eddfd3d34d8d6b01ac6f92233 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h @@ -0,0 +1,59 @@ +#ifndef TIMESTEPTIMER_H +#define TIMESTEPTIMER_H + +#include "helper_cuda.h" +#include <cuda_runtime.h> +#include "Core/DataTypes.h" +#include "UbScheduler.h" +#include "Parameter/Parameter.h" + +#include "Timer.h" + +class TimeStepTimer +{ + public: + TimeStepTimer(std::string _name, uint _tActivate): name(_name), tActivate(_tActivate) + { + + }; + + ~TimeStepTimer(){}; + + void startTotalTimer (uint t); + void stopTotalTimer (uint t); + void startCollisionTimer (uint t); + void stopCollisionTimer (uint t); + void startPostCollisionBCTimer (uint t); + void stopPostCollisionBCTimer (uint t); + void startPreCollisionBCTimer (uint t); + void stopPreCollisionBCTimer (uint t); + void startEddyViscosityTimer (uint t); + void stopEddyViscosityTimer (uint t); + void startActuatorTimer (uint t); + void stopActuatorTimer (uint t); + void startProbeTimer (uint t); + void stopProbeTimer (uint t); + void startExchangeTimer (uint t); + void stopExchangeTimer (uint t); + + void resetTimers(uint t); + void outputPerformance(uint t, Parameter* para); + + private: + + Timer* totalTimer = new Timer("total"); + Timer* collisionTimer = new Timer("collision"); + Timer* postCollisionBCTimer = new Timer("postCollisionBC"); + Timer* preCollisionBCTimer = new Timer("preCollisionBC"); + Timer* eddyViscosityTimer = new Timer("eddyViscosity"); + Timer* actuatorTimer = new Timer("actuator"); + Timer* probeTimer = new Timer("probes"); + Timer* exchangeTimer = new Timer("exchange"); + + std::string name; + uint tActivate; +}; + + + +#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..823364a22eca41517816c1fdb61dfdc96ef1d961 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp @@ -0,0 +1,51 @@ + +#include <iostream> +#include <cuda_runtime.h> +#include "UbScheduler.h" +#include "Timer.h" + + +void Timer::initTimer() +{ + cudaEventCreate(&this->start_t); + cudaEventCreate(&this->stop_t ); +} + +void Timer::startTimer() +{ + checkCudaErrors(cudaEventRecord(this->start_t)); +} + +void Timer::stopTimer() +{ + checkCudaErrors(cudaEventRecord(this->stop_t)); + checkCudaErrors(cudaEventSynchronize(this->stop_t)); + checkCudaErrors(cudaEventElapsedTime(&this->elapsedTime, this->start_t, this->stop_t)); + this->totalElapsedTime += this->elapsedTime; +} + +void Timer::resetTimer() +{ + this->elapsedTime = 0.0; + this->totalElapsedTime = 0.0; +} + +void Timer::outputPerformance(uint t, Parameter* para) +{ + real fnups = 0.0; + real bandwidth = 0.0; + + for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) + { + fnups += 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP * pow(2.,lev) / (this->totalElapsedTime*1.0E6); + bandwidth += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (this->totalElapsedTime*1.0E9); + } + + if(this->firstOutput) + { + VF_LOG_INFO(" --- {} --- Processing time (ms) \t Nups in Mio \t Bandwidth in GB/sec", this->name ); + this->firstOutput = false; + } + + VF_LOG_INFO(" --- {} --- {}/{} \t {} \t {}", this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth ); +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.h b/src/gpu/VirtualFluids_GPU/Output/Timer.h new file mode 100644 index 0000000000000000000000000000000000000000..6432b347458e68a5089aea3de625017d6facd34b --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.h @@ -0,0 +1,47 @@ +#ifndef TIMER_H +#define TIMER_H + +#include "helper_cuda.h" +#include <cuda_runtime.h> +#include "Core/DataTypes.h" + +#include "UbScheduler.h" +#include "logger/Logger.h" +#include "Parameter/Parameter.h" + +class Timer +{ + public: + Timer(std::string _name): name(_name) + { + this->initTimer(); + }; + + ~Timer() + { + cudaEventDestroy(this->start_t); + cudaEventDestroy(this->stop_t); + }; + + void initTimer(); + void startTimer(); + void stopTimer(); + void resetTimer(); + void outputPerformance(uint t, Parameter* para); + + float getElapsedTime(){ return this->elapsedTime; } + float getTotalElapsedTime(){ return this->totalElapsedTime; } + + private: + + cudaEvent_t start_t, stop_t; + float elapsedTime = 0.0; + float totalElapsedTime = 0.0; + std::string name; + + bool firstOutput = true; +}; + + + +#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index b29db903ad0ad5e4ea9c18e36f152d81c7b952c6..c10b5b690bf8aa2c819b26acf1509f337debafe3 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -109,12 +109,15 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData) ////////////////////////////////////////////////////////////////////////// if (configData.contains("UseMeasurePoints")) this->setUseMeasurePoints(configData.getValue<bool>("UseMeasurePoints")); - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// if (configData.contains("UseWale")) this->setUseWale(configData.getValue<bool>("UseWale")); ////////////////////////////////////////////////////////////////////////// if (configData.contains("UseAMD")) this->setUseAMD(configData.getValue<bool>("UseAMD")); + ////////////////////////////////////////////////////////////////////////// + if (configData.contains("SGSconstant")) + this->setSGSConstant(configData.getValue<real>("SGSconstant")); ////////////////////////////////////////////////////////////////////////// if (configData.contains("UseInitNeq")) this->setUseInitNeq(configData.getValue<bool>("UseInitNeq")); @@ -572,11 +575,12 @@ void Parameter::setForcing(real forcingX, real forcingY, real forcingZ) this->hostForcing[2] = forcingZ; } void Parameter::setQuadricLimiters(real quadricLimiterP, real quadricLimiterM, real quadricLimiterD) -{ +{ this->hostQuadricLimiters[0] = quadricLimiterP; this->hostQuadricLimiters[1] = quadricLimiterM; this->hostQuadricLimiters[2] = quadricLimiterD; } + void Parameter::setPhi(real inPhi) { Phi = inPhi; @@ -866,6 +870,10 @@ void Parameter::setSGSConstant(real SGSConstant) { ic.SGSConstant = SGSConstant; } +void Parameter::setHasWallModelMonitor(bool hasWallModelMonitor) +{ + ic.hasWallModelMonitor = hasWallModelMonitor; +} void Parameter::setUseInitNeq(bool useInitNeq) { ic.isInitNeq = useInitNeq; @@ -2261,6 +2269,10 @@ real Parameter::getSGSConstant() { return ic.SGSConstant; } +bool Parameter::getHasWallModelMonitor() +{ + return ic.hasWallModelMonitor; +} bool Parameter::getUseInitNeq() { return ic.isInitNeq; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index ea9b42f94e63a36c2fd0b6b669e959a8b4810e8f..48cf410ff8b700ef69d26883c5ef22048f9fd322 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -189,9 +189,9 @@ struct LBMSimulationParameter unsigned int mem_size_kFC_off; // BC's//////////////////// - QforBoundaryConditions QWall, Qinflow, Qoutflow, QSlip; - unsigned int kQ = 0, kInflowQ = 0, kOutflowQ = 0, kSlipQ = 0; - unsigned int kQread, kInflowQread, kOutflowQread, kSlipQread; + QforBoundaryConditions QWall, Qinflow, Qoutflow, QSlip, QStress; + unsigned int kQ = 0, kInflowQ = 0, kOutflowQ = 0, kSlipQ = 0, kStressQ = 0; + unsigned int kQread, kInflowQread, kOutflowQread, kSlipQread, kStressQread; QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; QforBoundaryConditions QPropeller; @@ -203,6 +203,9 @@ struct LBMSimulationParameter QforBoundaryConditions QInlet, QOutlet, QPeriodic; unsigned int kInletQread, kOutletQread; unsigned int kPressQ = 0, kPressQread; + + WallModelParameters wallModel; + // testRoundoffError Distributions27 kDistTestRE; @@ -453,6 +456,7 @@ public: void setUseTurbulentViscosity(bool useTurbulentViscosity); void setUseAMD( bool useAMD); void setSGSConstant( real SGSConstant); + void setHasWallModelMonitor(bool hasWallModelMonitor); void setUseInitNeq(bool useInitNeq); void setSimulatePorousMedia(bool simulatePorousMedia); void setIsF3(bool isF3); @@ -714,6 +718,7 @@ public: bool getUseTurbulentViscosity(); bool getUseAMD(); real getSGSConstant(); + bool getHasWallModelMonitor(); bool getUseInitNeq(); bool getSimulatePorousMedia(); bool getIsF3(); diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu new file mode 100644 index 0000000000000000000000000000000000000000..5ac087ccfec2dc71439054921c8500568c9c070d --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu @@ -0,0 +1,457 @@ +#include "Probe.h" +#include "PlanarAverageProbe.h" + +#include <cuda/CudaGrid.h> + +#include <cuda.h> +#include <cuda_runtime.h> +#include <helper_cuda.h> + +#include <thrust/device_vector.h> +#include <thrust/reduce.h> +#include <thrust/device_ptr.h> +#include <thrust/inner_product.h> + +#include "Parameter/Parameter.h" +#include "DataStructureInitializer/GridProvider.h" +#include "GPU/CudaMemoryManager.h" + +#include <algorithm> + +/////////////////////////////////////////////////////////////////////////////////// +/// Functors for thrust reductions +/////////////////////////////////////////////////////////////////////////////////// + +template<typename T> +struct pow2 : public thrust::unary_function<T,T> +{ + __host__ __device__ T operator()(const T &x) const + { + return x * x; + } +}; + +template<typename T> +struct pow3 : public thrust::unary_function<T,T> +{ + __host__ __device__ T operator()(const T &x) const + { + return x * x * x; + } +}; + +template<typename T> +struct pow4 : public thrust::unary_function<T,T> +{ + __host__ __device__ T operator()(const T &x) const + { + return x * x * x * x; + } +}; + +struct nth_moment +{ + const float mean; + const int n; + + nth_moment(float _mean, int _n) : mean(_mean), n(_n) {} + + __host__ __device__ + float operator()(const float& x) const { + + real fluctuation = x-mean; + real moment = fluctuation; + for(int i = 1; i<n; i++) moment *= fluctuation; + + return moment; + } +}; + + +/////////////////////////////////////////////////////////////////////////////////// + +__global__ void moveIndicesInPosNormalDir( uint* pointIndices, uint nPoints, uint* neighborNormal, real* coordsX, real* coordsY, real* coordsZ ) +{ + const uint x = threadIdx.x; + const uint y = blockIdx.x; + const uint z = blockIdx.y; + + const uint nx = blockDim.x; + const uint ny = gridDim.x; + + const uint node = nx*(ny*z + y) + x; + + if(node>=nPoints) return; + + uint k = pointIndices[node]; + + pointIndices[node] = neighborNormal[k]; +} + +__global__ void moveIndicesInNegNormalDir( uint* pointIndices, uint nPoints, uint* neighborWSB, uint* neighborInplane1, uint* neighborInplane2, real* coordsX, real* coordsY, real* coordsZ ) +{ + const uint x = threadIdx.x; + const uint y = blockIdx.x; + const uint z = blockIdx.y; + + const uint nx = blockDim.x; + const uint ny = gridDim.x; + + const uint node = nx*(ny*z + y) + x; + + if(node>=nPoints) return; + + uint k = pointIndices[node]; + + pointIndices[node] = neighborWSB[neighborInplane1[neighborInplane2[k]]]; +} + +/////////////////////////////////////////////////////////////////////////////////// + +bool PlanarAverageProbe::isAvailableStatistic(Statistic _variable) +{ + bool isAvailable; + + switch (_variable) + { + case Statistic::Instantaneous: + case Statistic::Means: + case Statistic::Variances: + isAvailable = false; + break; + case Statistic::SpatialMeans: + case Statistic::SpatioTemporalMeans: + case Statistic::SpatialCovariances: + case Statistic::SpatioTemporalCovariances: + case Statistic::SpatialSkewness: + case Statistic::SpatioTemporalSkewness: + case Statistic::SpatialFlatness: + case Statistic::SpatioTemporalFlatness: + isAvailable = true; + break; + default: + isAvailable = false; + } + return isAvailable; +} + +/////////////////////////////////////////////////////////////////////////////////// +std::vector<PostProcessingVariable> PlanarAverageProbe::getPostProcessingVariables(Statistic statistic) +{ + std::vector<PostProcessingVariable> postProcessingVariables; + switch (statistic) + { + case Statistic::SpatialMeans: + postProcessingVariables.push_back( PostProcessingVariable("vx_spatMean", velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_spatMean", this->velocityRatio) ); + break; + case Statistic::SpatioTemporalMeans: + postProcessingVariables.push_back( PostProcessingVariable("vx_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_spatTmpMean", this->velocityRatio) ); + break; + case Statistic::SpatialCovariances: + postProcessingVariables.push_back( PostProcessingVariable("vxvx_spatMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vyvy_spatMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vzvz_spatMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vxvy_spatMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vxvz_spatMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vyvz_spatMean", pow(this->velocityRatio, 2.0)) ); + break; + case Statistic::SpatioTemporalCovariances: + postProcessingVariables.push_back( PostProcessingVariable("vxvx_spatTmpMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vyvy_spatTmpMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vzvz_spatTmpMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vxvy_spatTmpMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vxvz_spatTmpMean", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vyvz_spatTmpMean", pow(this->velocityRatio, 2.0)) ); + break; + case Statistic::SpatialSkewness: + postProcessingVariables.push_back( PostProcessingVariable("Sx_spatMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Sy_spatMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Sz_spatMean", 1.0) ); + break; + case Statistic::SpatioTemporalSkewness: + postProcessingVariables.push_back( PostProcessingVariable("Sx_spatTmpMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Sy_spatTmpMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Sz_spatTmpMean", 1.0) ); + break; + case Statistic::SpatialFlatness: + postProcessingVariables.push_back( PostProcessingVariable("Fx_spatMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Fy_spatMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Fz_spatMean", 1.0) ); + break; + case Statistic::SpatioTemporalFlatness: + postProcessingVariables.push_back( PostProcessingVariable("Fx_spatTmpMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Fy_spatTmpMean", 1.0) ); + postProcessingVariables.push_back( PostProcessingVariable("Fz_spatTmpMean", 1.0) ); + break; + + default: + printf("Statistic unavailable in PlanarAverageProbe\n"); + assert(false); + break; + } + return postProcessingVariables; +} + +/////////////////////////////////////////////////////////////////////////////////// + +void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, + std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, + std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, + int level) +{ + real dx = abs(para->getParH(level)->coordX_SP[1]-para->getParH(level)->coordX_SP[para->getParH(level)->neighborX_SP[1]]); + + real /* *pointCoordsInplane1_par, *pointCoordsInplane2_par,*/ *pointCoordsNormal_par; + std::vector<real> *pointCoordsInplane1, *pointCoordsInplane2, *pointCoordsNormal; + + if(this->planeNormal == 'x'){ + pointCoordsNormal = &pointCoordsX_level; + pointCoordsInplane1 = &pointCoordsY_level; + pointCoordsInplane2 = &pointCoordsZ_level; + pointCoordsNormal_par = para->getParH(level)->coordX_SP; + // pointCoordsInplane1_par = para->getParH(level)->coordY_SP; + // pointCoordsInplane2_par = para->getParH(level)->coordZ_SP; + } + if(this->planeNormal == 'y'){ + pointCoordsNormal = &pointCoordsY_level; + pointCoordsInplane1 = &pointCoordsX_level; + pointCoordsInplane2 = &pointCoordsZ_level; + pointCoordsNormal_par = para->getParH(level)->coordY_SP; + // pointCoordsInplane1_par = para->getParH(level)->coordX_SP; + // pointCoordsInplane2_par = para->getParH(level)->coordZ_SP; + } + if(this->planeNormal == 'z'){ + pointCoordsNormal = &pointCoordsZ_level; + pointCoordsInplane1 = &pointCoordsX_level; + pointCoordsInplane2 = &pointCoordsY_level; + pointCoordsNormal_par = para->getParH(level)->coordZ_SP; + // pointCoordsInplane1_par = para->getParH(level)->coordX_SP; + // pointCoordsInplane2_par = para->getParH(level)->coordY_SP; + } + + // Find all points along the normal direction + for(uint j=1; j<para->getParH(level)->size_Mat_SP; j++ ) + { + if(para->getParH(level)->geoSP[j] == GEO_FLUID) + { + if( std::find(pointCoordsNormal->begin(), pointCoordsNormal->end(), pointCoordsNormal_par[j]) == pointCoordsNormal->end()) + { + pointCoordsNormal->push_back( pointCoordsNormal_par[j] ); + pointCoordsInplane1->push_back(999999.); + pointCoordsInplane2->push_back(999999.); + } + } + } + std::sort(pointCoordsNormal->begin(), pointCoordsNormal->end()); + + // Find all pointCoords in the first plane + for(uint j=1; j<para->getParH(level)->size_Mat_SP; j++ ) + { + if( para->getParH(level)->geoSP[j] == GEO_FLUID && pointCoordsNormal_par[j] == pointCoordsNormal->at(0)) + { + //not needed in current state, might become relevant for two-point correlations + // pointCoordsNormal->push_back( pointCoordsNormal_par[j] ); + // pointCoordsInplane1->push_back( pointCoordsInplane1_par[j] ); + // pointCoordsInplane2->push_back( pointCoordsInplane2_par[j] ); + + probeIndices_level.push_back(j); + } + } +} + +/////////////////////////////////////////////////////////////////////////////////// + +void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) +{ + // Definition of normal and inplane directions for moveIndices kernels + uint *neighborNormal, *neighborInplane1, *neighborInplane2; + if( this->planeNormal == 'x' ) + { + neighborNormal = para->getParD(level)->neighborX_SP; + neighborInplane1 = para->getParD(level)->neighborY_SP; + neighborInplane2 = para->getParD(level)->neighborZ_SP; + } + if( this->planeNormal == 'y' ) + { + neighborNormal = para->getParD(level)->neighborY_SP; + neighborInplane1 = para->getParD(level)->neighborX_SP; + neighborInplane2 = para->getParD(level)->neighborZ_SP; + } + if( this->planeNormal == 'z' ) + { + neighborNormal = para->getParD(level)->neighborZ_SP; + neighborInplane1 = para->getParD(level)->neighborX_SP; + neighborInplane2 = para->getParD(level)->neighborY_SP; + } + + bool doTmpAveraging = (t>this->getTStartTmpAveraging()); + + // Pointer casts to use device arrays in thrust reductions + thrust::device_ptr<uint> indices_thrust = thrust::device_pointer_cast(probeStruct->pointIndicesD); + thrust::device_ptr<real> vx_thrust = thrust::device_pointer_cast(para->getParD(level)->vx_SP); + thrust::device_ptr<real> vy_thrust = thrust::device_pointer_cast(para->getParD(level)->vy_SP); + thrust::device_ptr<real> vz_thrust = thrust::device_pointer_cast(para->getParD(level)->vz_SP); + + real N = (real)probeStruct->nIndices; + real n = (real)probeStruct->vals; + uint nPoints = probeStruct->nPoints; + // Permutation iterators for direct iteration over the velocities of the planes + typedef thrust::device_vector<real>::iterator valIterator; + typedef thrust::device_vector<uint>::iterator indIterator; + thrust::permutation_iterator<valIterator, indIterator> vx_iter_begin(vx_thrust, indices_thrust); + thrust::permutation_iterator<valIterator, indIterator> vx_iter_end (vx_thrust, indices_thrust+probeStruct->nIndices); + thrust::permutation_iterator<valIterator, indIterator> vy_iter_begin(vy_thrust, indices_thrust); + thrust::permutation_iterator<valIterator, indIterator> vy_iter_end (vy_thrust, indices_thrust+probeStruct->nIndices); + thrust::permutation_iterator<valIterator, indIterator> vz_iter_begin(vz_thrust, indices_thrust); + thrust::permutation_iterator<valIterator, indIterator> vz_iter_end (vz_thrust, indices_thrust+probeStruct->nIndices); + + for( uint i=0; i<nPoints; i++ ) + { + uint node = this->isEvenTAvg? i : nPoints-1-i; // Note, loop moves in positive normal dir at even calls and in negative normal dir in odd calls + + if(probeStruct->quantitiesH[int(Statistic::SpatialMeans)]) + { + // Compute the instantaneous spatial means of the velocity moments + real spatMean_vx = thrust::reduce(vx_iter_begin, vx_iter_end)/N; + real spatMean_vy = thrust::reduce(vy_iter_begin, vy_iter_end)/N; + real spatMean_vz = thrust::reduce(vz_iter_begin, vz_iter_end)/N; + + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialMeans)]; + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] = spatMean_vx; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] = spatMean_vy; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] = spatMean_vz; + + if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalMeans)] && doTmpAveraging) + { + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalMeans)]; + real spatTmpMean_vx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node]; + real spatTmpMean_vy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node]; + real spatTmpMean_vz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node]; + + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vx-spatTmpMean_vx_old)/n; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vy-spatTmpMean_vy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vz-spatTmpMean_vz_old)/n; + } + + if(probeStruct->quantitiesH[int(Statistic::SpatialCovariances)]) + { // <u_i' u_j'> = <u_i u_j> - <u_i>*<u_i> + real vx2 = thrust::transform_reduce(vx_iter_begin, vx_iter_end, pow2<real>(), 0.f, thrust::plus<real>())/N; + real vy2 = thrust::transform_reduce(vy_iter_begin, vy_iter_end, pow2<real>(), 0.f, thrust::plus<real>())/N; + real vz2 = thrust::transform_reduce(vz_iter_begin, vz_iter_end, pow2<real>(), 0.f, thrust::plus<real>())/N; + real vxvy = thrust::inner_product(vx_iter_begin, vx_iter_end, vy_iter_begin, 0.f)/N; + real vxvz = thrust::inner_product(vx_iter_begin, vx_iter_end, vz_iter_begin, 0.f)/N; + real vyvz = thrust::inner_product(vy_iter_begin, vy_iter_end, vz_iter_begin, 0.f)/N; + real spatMean_vxvx = vx2-spatMean_vx*spatMean_vx; + real spatMean_vyvy = vy2-spatMean_vy*spatMean_vy; + real spatMean_vzvz = vz2-spatMean_vz*spatMean_vz; + real spatMean_vxvy = vxvy-spatMean_vx*spatMean_vy; + real spatMean_vxvz = vxvz-spatMean_vx*spatMean_vz; + real spatMean_vyvz = vyvz-spatMean_vy*spatMean_vz; + + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialCovariances)]; + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] = spatMean_vxvx; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] = spatMean_vyvy; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] = spatMean_vzvz; + probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] = spatMean_vxvy; + probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+node] = spatMean_vxvz; + probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+node] = spatMean_vyvz; + + if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalCovariances)] && doTmpAveraging) + { + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalCovariances)]; + real spatTmpMean_vxvx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node]; + real spatTmpMean_vyvy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node]; + real spatTmpMean_vzvz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node]; + real spatTmpMean_vxvy_old = probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node]; + real spatTmpMean_vxvz_old = probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+node]; + real spatTmpMean_vyvz_old = probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+node]; + + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vxvx-spatTmpMean_vxvx_old)/n; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vyvy-spatTmpMean_vyvy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vzvz-spatTmpMean_vzvz_old)/n; + probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] += (spatMean_vxvy-spatTmpMean_vxvy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+node] += (spatMean_vxvz-spatTmpMean_vxvz_old)/n; + probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+node] += (spatMean_vyvz-spatTmpMean_vyvz_old)/n; + } + + if(probeStruct->quantitiesH[int(Statistic::SpatialSkewness)]) + { // <u_i'^3> = <u_i^3> - <u_i>^3 - 3 <u_i> <u_i'^2> + // real vx3 = thrust::transform_reduce(vx_iter_begin, vx_iter_end, pow3<real>(), 0.f, thrust::plus<real>())/N; + // real vy3 = thrust::transform_reduce(vy_iter_begin, vy_iter_end, pow3<real>(), 0.f, thrust::plus<real>())/N; + // real vz3 = thrust::transform_reduce(vz_iter_begin, vz_iter_end, pow3<real>(), 0.f, thrust::plus<real>())/N; + real spatMean_vxvxvx = thrust::transform_reduce(vx_iter_begin, vx_iter_end, nth_moment(spatMean_vx, 3), 0.f, thrust::plus<real>())/N; + //vx3 - spatMean_vx*spatMean_vx*spatMean_vx - 3*spatMean_vx*spatMean_vxvx; -> alternative only using vx3, etc. but containing some bug. Potentially better in terms of round-off errors. + real spatMean_vyvyvy = thrust::transform_reduce(vy_iter_begin, vy_iter_end, nth_moment(spatMean_vy, 3), 0.f, thrust::plus<real>())/N; + //vy3 - spatMean_vy*spatMean_vy*spatMean_vy - 3*spatMean_vy*spatMean_vzvz; + real spatMean_vzvzvz = thrust::transform_reduce(vz_iter_begin, vz_iter_end, nth_moment(spatMean_vz, 3), 0.f, thrust::plus<real>())/N; + //vz3 - spatMean_vz*spatMean_vz*spatMean_vz - 3*spatMean_vz*spatMean_vzvz; + real spatMean_Sx = spatMean_vxvxvx/pow(spatMean_vxvx, 1.5f); + real spatMean_Sy = spatMean_vyvyvy/pow(spatMean_vyvy, 1.5f); + real spatMean_Sz = spatMean_vzvzvz/pow(spatMean_vzvz, 1.5f); + + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialSkewness)]; + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] = spatMean_Sx; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] = spatMean_Sy; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] = spatMean_Sz; + + if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalSkewness)] && doTmpAveraging) + { + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalSkewness)]; + real spatTmpMean_Sx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node]; + real spatTmpMean_Sy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node]; + real spatTmpMean_Sz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node]; + + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_Sx-spatTmpMean_Sx_old)/n; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_Sy-spatTmpMean_Sy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_Sz-spatTmpMean_Sz_old)/n; + } + + if(probeStruct->quantitiesH[int(Statistic::SpatialFlatness)]) + { // <u_i'^4> = <u_i^4> - <u_i>^4 - 6 <u_i>^2 <u_i'^2> - 4 <u> <u'^3> + // real vx4 = thrust::transform_reduce(vx_iter_begin, vx_iter_end, pow4<real>(), 0.f, thrust::plus<real>())/N; + // real vy4 = thrust::transform_reduce(vy_iter_begin, vy_iter_end, pow4<real>(), 0.f, thrust::plus<real>())/N; + // real vz4 = thrust::transform_reduce(vz_iter_begin, vz_iter_end, pow4<real>(), 0.f, thrust::plus<real>())/N; + real spatMean_vxvxvxvx = thrust::transform_reduce(vx_iter_begin, vx_iter_end, nth_moment(spatMean_vx, 4), 0.f, thrust::plus<real>())/N; //vx4 - spatMean_vx*spatMean_vx*spatMean_vx*spatMean_vx - 6*spatMean_vx*spatMean_vx*vx2 - 4*spatMean_vx*vx3; + real spatMean_vyvyvyvy = thrust::transform_reduce(vy_iter_begin, vy_iter_end, nth_moment(spatMean_vy, 4), 0.f, thrust::plus<real>())/N; //vy4 - spatMean_vy*spatMean_vy*spatMean_vy*spatMean_vy - 6*spatMean_vy*spatMean_vx*vy2 - 4*spatMean_vy*vy3; + real spatMean_vzvzvzvz = thrust::transform_reduce(vz_iter_begin, vz_iter_end, nth_moment(spatMean_vz, 4), 0.f, thrust::plus<real>())/N; //vz4 - spatMean_vz*spatMean_vz*spatMean_vz*spatMean_vz - 6*spatMean_vz*spatMean_vx*vz2 - 4*spatMean_vz*vz3; + real spatMean_Fx = spatMean_vxvxvxvx/(spatMean_vxvx*spatMean_vxvx); + real spatMean_Fy = spatMean_vyvyvyvy/(spatMean_vyvy*spatMean_vyvy); + real spatMean_Fz = spatMean_vzvzvzvz/(spatMean_vzvz*spatMean_vzvz); + + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialFlatness)]; + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] = spatMean_Fx; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] = spatMean_Fy; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] = spatMean_Fz; + + if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalFlatness)] && doTmpAveraging) + { + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalFlatness)]; + real spatTmpMean_Fx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node]; + real spatTmpMean_Fy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node]; + real spatTmpMean_Fz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node]; + + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_Fx-spatTmpMean_Fx_old)/n; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_Fy-spatTmpMean_Fy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_Fz-spatTmpMean_Fz_old)/n; + } + } + } + } + } + if(i<probeStruct->nPoints-1) + { + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nIndices); + if(this->isEvenTAvg) + moveIndicesInPosNormalDir<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nIndices, neighborNormal, para->getParD(level)->coordX_SP, para->getParD(level)->coordY_SP, para->getParD(level)->coordZ_SP ); + else + moveIndicesInNegNormalDir<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nIndices, para->getParD(level)->neighborWSB_SP, neighborInplane1, neighborInplane2, para->getParD(level)->coordX_SP, para->getParD(level)->coordY_SP, para->getParD(level)->coordZ_SP ); + } + } + this->isEvenTAvg=!this->isEvenTAvg; + + getLastCudaError("PlanarAverageProbe::calculateQuantities execution failed"); +} diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h new file mode 100644 index 0000000000000000000000000000000000000000..7054f5fc7e02453418285281a0ea9cf9c32dc0c0 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h @@ -0,0 +1,97 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PlanarAverageProbe.h +//! \author Henrik Asmuth +//! \date 13/05/2022 +//! \brief Probe computing statistics across planes spanning the entire domain +//! +//! Computes spatial statistics across x, y or z-normal planes defined by planeNormal. +//! The planes include all points of the domain at each respective position along that normal direction. +//! The spatial statistics can additionally be averaged in time. +//! +//======================================================================================= + +#ifndef PlanarAverageProbe_H +#define PlanarAverageProbe_H + +#include "Probe.h" + +__global__ void moveIndicesInNegNormalDir( uint* pointIndices, uint nPoints, uint* neighborWSB, uint* neighborInplane1, uint* neighborInplane2, real* coordsX, real* coordsY, real* coordsZ ); + +__global__ void moveIndicesInPosNormalDir( uint* pointIndices, uint nPoints, uint* neighborNormal, real* coordsX, real* coordsY, real* coordsZ ); + +/////////////////////////////////////////////////////////////////////////////////// + +class PlanarAverageProbe : public Probe +{ +public: + PlanarAverageProbe( + const std::string _probeName, + const std::string _outputPath, + uint _tStartAvg, + uint _tStartTmpAvg, + uint _tAvg, + uint _tStartOut, + uint _tOut, + char _planeNormal + ): Probe(_probeName, + _outputPath, + _tStartAvg, + _tStartTmpAvg, + _tAvg, + _tStartOut, + _tOut, + false, + false), + planeNormal(_planeNormal) + + { + assert(_planeNormal == 'x' || _planeNormal == 'y' || _planeNormal == 'z'); + } + + +private: + bool isAvailableStatistic(Statistic _variable) override; + + std::vector<PostProcessingVariable> getPostProcessingVariables(Statistic variable) override; + + void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, + std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, + std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, + int level) override; + void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) override; + +private: + real posX, posY, posZ; + real deltaX, deltaY, deltaZ; + char planeNormal; + bool isEvenTAvg = true; +}; + +#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu index cf03d639add0c883793c6ffad041e7b6da6d98d3..15f10d8203a5d688da7f6bd18a976eaad5776b5c 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu @@ -1,3 +1,4 @@ +#include "Probe.h" #include "PlaneProbe.h" #include <cuda/CudaGrid.h> @@ -10,6 +11,66 @@ #include "DataStructureInitializer/GridProvider.h" #include "GPU/CudaMemoryManager.h" + +bool PlaneProbe::isAvailableStatistic(Statistic _variable) +{ + bool isAvailable; + switch (_variable) + { + case Statistic::Instantaneous: + case Statistic::Means: + case Statistic::Variances: + isAvailable = true; + break; + case Statistic::SpatialMeans: + case Statistic::SpatioTemporalMeans: + case Statistic::SpatialCovariances: + case Statistic::SpatioTemporalCovariances: + case Statistic::SpatialSkewness: + case Statistic::SpatioTemporalSkewness: + case Statistic::SpatialFlatness: + case Statistic::SpatioTemporalFlatness: + isAvailable = false; + break; + default: + isAvailable = false; + } + return isAvailable; +} + + +std::vector<PostProcessingVariable> PlaneProbe::getPostProcessingVariables(Statistic statistic) +{ + std::vector<PostProcessingVariable> postProcessingVariables; + switch (statistic) + { + case Statistic::Instantaneous: + postProcessingVariables.push_back( PostProcessingVariable("vx", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("rho", this->densityRatio ) ); + break; + case Statistic::Means: + postProcessingVariables.push_back( PostProcessingVariable("vx_mean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_mean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_mean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("rho_mean", this->densityRatio ) ); + break; + case Statistic::Variances: + postProcessingVariables.push_back( PostProcessingVariable("vx_var", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_var", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_var", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("rho_var", pow(this->densityRatio, 2.0)) ); + break; + + default: + printf("Statistic unavailable in PlaneProbe\n"); + assert(false); + break; + } + return postProcessingVariables; +} + void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, @@ -39,13 +100,11 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve } } -void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) +void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) { vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints); - interpQuantities<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals, - probeStruct->distXD, probeStruct->distYD, probeStruct->distZD, - para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, - para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, - probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD, false); - + calcQuantitiesKernel<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals, + para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, + para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD); } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h index 1eb8197d4fcaa2ee44fd929af913c3c187a3dcdf..3440c01020f9b3505be7148024e47373b76648ff 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h @@ -1,3 +1,41 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PlaneProbe.h +//! \author Henry Korb, Henrik Asmuth +//! \date 13/05/2022 +//! \brief Probe computing point-wise statistics for a set of points across a plane +//! +//! The set of points can be defined by providing a list or on an x-normal plane. +//! All statistics are temporal. +//! +//======================================================================================= + #ifndef PlaneProbe_H #define PlaneProbe_H @@ -10,13 +48,18 @@ public: const std::string _probeName, const std::string _outputPath, uint _tStartAvg, + uint _tAvg, uint _tStartOut, uint _tOut ): Probe(_probeName, _outputPath, _tStartAvg, + 0, + _tAvg, _tStartOut, - _tOut) + _tOut, + true, + false) {} void setProbePlane(real _posX, real _posY, real _posZ, real _deltaX, real _deltaY, real _deltaZ) @@ -30,11 +73,15 @@ public: } private: + bool isAvailableStatistic(Statistic _variable) override; + + std::vector<PostProcessingVariable> getPostProcessingVariables(Statistic variable) override; + void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, int level) override; - void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) override; + void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) override; private: real posX, posY, posZ; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu index 76467d8da942cb189516571db66a473e5c4c32d5..7c0b5947a03330997678b55d7d8063685dca4e1c 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu @@ -1,3 +1,4 @@ +#include "Probe.h" #include "PointProbe.h" #include <cuda.h> @@ -10,6 +11,64 @@ #include "DataStructureInitializer/GridProvider.h" #include "GPU/CudaMemoryManager.h" +bool PointProbe::isAvailableStatistic(Statistic _variable) +{ + bool isAvailable; + switch (_variable) + { + case Statistic::Instantaneous: + case Statistic::Means: + case Statistic::Variances: + isAvailable = true; + break; + case Statistic::SpatialMeans: + case Statistic::SpatioTemporalMeans: + case Statistic::SpatialCovariances: + case Statistic::SpatioTemporalCovariances: + case Statistic::SpatialSkewness: + case Statistic::SpatioTemporalSkewness: + case Statistic::SpatialFlatness: + case Statistic::SpatioTemporalFlatness: + isAvailable = false; + break; + default: + isAvailable = false; + } + return isAvailable; +} + +std::vector<PostProcessingVariable> PointProbe::getPostProcessingVariables(Statistic statistic) +{ + std::vector<PostProcessingVariable> postProcessingVariables; + switch (statistic) + { + case Statistic::Instantaneous: + postProcessingVariables.push_back( PostProcessingVariable("vx", velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("rho", this->densityRatio ) ); + break; + case Statistic::Means: + postProcessingVariables.push_back( PostProcessingVariable("vx_mean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_mean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_mean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("rho_mean", this->densityRatio ) ); + break; + case Statistic::Variances: + postProcessingVariables.push_back( PostProcessingVariable("vx_var", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_var", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_var", pow(this->velocityRatio, 2.0)) ); + postProcessingVariables.push_back( PostProcessingVariable("rho_var", pow(this->densityRatio, 2.0)) ); + break; + + default: + printf("Statistic unavailable in PointProbe\n"); + assert(false); + break; + } + return postProcessingVariables; +} + void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, @@ -42,15 +101,14 @@ void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve } } -void PointProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) +void PointProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) { vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints); - - interpQuantities<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals, - probeStruct->distXD, probeStruct->distYD, probeStruct->distZD, - para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, - para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, - probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD, true); + interpAndCalcQuantitiesKernel<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals, + probeStruct->distXD, probeStruct->distYD, probeStruct->distZD, + para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, + para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, + probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD); } void PointProbe::addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ) diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h index c64b2e592bd9b766d0a5bb1553c76d43e433b455..6a6fbe76f089acfafc22672dd3e9d71bd193a3b3 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h @@ -1,3 +1,41 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PointProbe.h +//! \author Henry Korb, Henrik Asmuth +//! \date 13/05/2022 +//! \brief Probe computing statistics for a set of points in space +//! +//! The set of points can be defined by providing a list or on an x-normal plane (the latter being somewhat redundant with PlaneProbe) +//! All statistics are temporal. +//! +//======================================================================================= + #ifndef PointProbe_H #define PointProbe_H @@ -10,25 +48,34 @@ public: const std::string _probeName, const std::string _outputPath, uint _tStartAvg, + uint _tAvg, uint _tStartOut, uint _tOut ): Probe(_probeName, _outputPath, _tStartAvg, + 0, + _tAvg, _tStartOut, - _tOut) + _tOut, + true, + false) {} void addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ); void addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, uint n_y, uint n_z); private: + bool isAvailableStatistic(Statistic _variable) override; + + std::vector<PostProcessingVariable> getPostProcessingVariables(Statistic variable) override; + void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, int level) override; - void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) override; + void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) override; private: std::vector<real> pointCoordsX, pointCoordsY, pointCoordsZ; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu index 1875ef83b9bd388f16cf7d63fe4c3af2968a9113..17679f4ff5292f83f8a6758aa55e588db7042472 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Probe.h +//! \author Henry Korb, Henrik Asmuth +//======================================================================================= + #include "Probe.h" #include <cuda.h> @@ -13,54 +45,25 @@ #include "GPU/CudaMemoryManager.h" -std::vector<std::string> getPostProcessingVariableNames(PostProcessingVariable variable) -{ - std::vector<std::string> varNames; - switch (variable) - { - case PostProcessingVariable::Instantaneous: - varNames.push_back("vx"); - varNames.push_back("vy"); - varNames.push_back("vz"); - varNames.push_back("rho"); - break; - case PostProcessingVariable::Means: - varNames.push_back("vx_mean"); - varNames.push_back("vy_mean"); - varNames.push_back("vz_mean"); - varNames.push_back("rho_mean"); - break; - case PostProcessingVariable::Variances: - varNames.push_back("vx_var"); - varNames.push_back("vy_var"); - varNames.push_back("vz_var"); - varNames.push_back("rho_var"); - break; - default: - break; - } - return varNames; -} - -__device__ void calculateQuantities(uint n, real* quantityArray, bool* quantities, uint* quantityArrayOffsets, uint nPoints, uint node, real vx, real vy, real vz, real rho) +__device__ void calculatePointwiseQuantities(uint n, real* quantityArray, bool* quantities, uint* quantityArrayOffsets, uint nPoints, uint node, real vx, real vy, real vz, real rho) { //"https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm" // also has extensions for higher order and covariances real inv_n = 1/real(n); - if(quantities[int(PostProcessingVariable::Instantaneous)]) + if(quantities[int(Statistic::Instantaneous)]) { - uint arrOff = quantityArrayOffsets[int(PostProcessingVariable::Instantaneous)]; + uint arrOff = quantityArrayOffsets[int(Statistic::Instantaneous)]; quantityArray[(arrOff+0)*nPoints+node] = vx; quantityArray[(arrOff+1)*nPoints+node] = vy; quantityArray[(arrOff+2)*nPoints+node] = vz; quantityArray[(arrOff+3)*nPoints+node] = rho; } - if(quantities[int(PostProcessingVariable::Means)]) + if(quantities[int(Statistic::Means)]) { - uint arrOff = quantityArrayOffsets[int(PostProcessingVariable::Means)]; + uint arrOff = quantityArrayOffsets[int(Statistic::Means)]; real vx_m_old = quantityArray[(arrOff+0)*nPoints+node]; real vy_m_old = quantityArray[(arrOff+1)*nPoints+node]; real vz_m_old = quantityArray[(arrOff+2)*nPoints+node]; @@ -76,9 +79,9 @@ __device__ void calculateQuantities(uint n, real* quantityArray, bool* quantitie quantityArray[(arrOff+2)*nPoints+node] = vz_m_new; quantityArray[(arrOff+3)*nPoints+node] = rho_m_new; - if(quantities[int(PostProcessingVariable::Variances)]) + if(quantities[int(Statistic::Variances)]) { - arrOff = quantityArrayOffsets[int(PostProcessingVariable::Variances)]; + arrOff = quantityArrayOffsets[int(Statistic::Variances)]; real vx_var_old = quantityArray[(arrOff+0)*nPoints+node]; real vy_var_old = quantityArray[(arrOff+1)*nPoints+node]; @@ -98,14 +101,12 @@ __device__ void calculateQuantities(uint n, real* quantityArray, bool* quantitie } } -__global__ void interpQuantities( uint* pointIndices, +__global__ void calcQuantitiesKernel( uint* pointIndices, uint nPoints, uint n, - real* distX, real* distY, real* distZ, real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, bool* quantities, - uint* quantityArrayOffsets, real* quantityArray, - bool interpolate + uint* quantityArrayOffsets, real* quantityArray ) { const uint x = threadIdx.x; @@ -124,35 +125,65 @@ __global__ void interpQuantities( uint* pointIndices, uint k = pointIndices[node]; real u_interpX, u_interpY, u_interpZ, rho_interp; - if(interpolate) - { - uint ke, kn, kt, kne, kte, ktn, ktne; - getNeighborIndicesOfBSW( k, ke, kn, kt, kne, kte, ktn, ktne, neighborX, neighborY, neighborZ); + u_interpX = vx[k]; + u_interpY = vy[k]; + u_interpZ = vz[k]; + rho_interp = rho[k]; - // Trilinear interpolation of macroscopic quantities to probe point - real dW, dE, dN, dS, dT, dB; - getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX[node], distY[node], distZ[node]); + calculatePointwiseQuantities(n, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp); +} - u_interpX = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx ); - u_interpY = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy ); - u_interpZ = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz ); - rho_interp = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, rho ); - } - else - { - u_interpX = vx[k]; - u_interpY = vy[k]; - u_interpZ = vz[k]; - rho_interp = rho[k]; - } +__global__ void interpAndCalcQuantitiesKernel( uint* pointIndices, + uint nPoints, uint n, + real* distX, real* distY, real* distZ, + real* vx, real* vy, real* vz, real* rho, + uint* neighborX, uint* neighborY, uint* neighborZ, + bool* quantities, + uint* quantityArrayOffsets, real* quantityArray + ) +{ + const uint x = threadIdx.x; + const uint y = blockIdx.x; + const uint z = blockIdx.y; + + const uint nx = blockDim.x; + const uint ny = gridDim.x; + + const uint node = nx*(ny*z + y) + x; + + if(node>=nPoints) return; + + // Get indices of neighbor nodes. + // node referring to BSW cell as seen from probe point + uint k = pointIndices[node]; + real u_interpX, u_interpY, u_interpZ, rho_interp; + + uint ke, kn, kt, kne, kte, ktn, ktne; + getNeighborIndicesOfBSW( k, ke, kn, kt, kne, kte, ktn, ktne, neighborX, neighborY, neighborZ); + + // Trilinear interpolation of macroscopic quantities to probe point + real dW, dE, dN, dS, dT, dB; + getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX[node], distY[node], distZ[node]); - calculateQuantities(n, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp); + u_interpX = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx ); + u_interpY = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy ); + u_interpZ = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz ); + rho_interp = trilinearInterpolation( dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, rho ); + + calculatePointwiseQuantities(n, quantityArray, quantities, quantityArrayOffsets, nPoints, node, u_interpX, u_interpY, u_interpZ, rho_interp); } +bool Probe::getHasDeviceQuantityArray(){ return this->hasDeviceQuantityArray; } + void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) { + this->velocityRatio = para->getVelocityRatio(); + this->densityRatio = para->getDensityRatio(); + this->forceRatio = para->getForceRatio(); + this->stressRatio = para->getDensityRatio()*pow(para->getVelocityRatio(), 2.0); + this->accelerationRatio = para->getVelocityRatio()/para->getTimeRatio(); probeParams.resize(para->getMaxLevel()+1); @@ -184,7 +215,8 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& pro { probeParams[level] = SPtr<ProbeStruct>(new ProbeStruct); probeParams[level]->vals = 1; - probeParams[level]->nPoints = uint(probeIndices.size()); + probeParams[level]->nPoints = uint(pointCoordsX.size()); // Note, need to have both nPoints and nIndices because they differ in PlanarAverage + probeParams[level]->nIndices = uint(probeIndices.size()); probeParams[level]->pointCoordsX = (real*)malloc(probeParams[level]->nPoints*sizeof(real)); probeParams[level]->pointCoordsY = (real*)malloc(probeParams[level]->nPoints*sizeof(real)); @@ -194,30 +226,34 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& pro std::copy(pointCoordsY.begin(), pointCoordsY.end(), probeParams[level]->pointCoordsY); std::copy(pointCoordsZ.begin(), pointCoordsZ.end(), probeParams[level]->pointCoordsZ); - // Might have to catch nPoints=0 ?!?! - cudaManager->cudaAllocProbeDistances(this, level); + // Note, dist only needed for kernels that do interpolate + if( distX.size()>0 && distY.size()>0 && distZ.size()>0 ) + { + probeParams[level]->hasDistances=true; + cudaManager->cudaAllocProbeDistances(this, level); + std::copy(distX.begin(), distX.end(), probeParams[level]->distXH); + std::copy(distY.begin(), distY.end(), probeParams[level]->distYH); + std::copy(distZ.begin(), distZ.end(), probeParams[level]->distZH); + cudaManager->cudaCopyProbeDistancesHtoD(this, level); + } + cudaManager->cudaAllocProbeIndices(this, level); - - std::copy(distX.begin(), distX.end(), probeParams[level]->distXH); - std::copy(distY.begin(), distY.end(), probeParams[level]->distYH); - std::copy(distZ.begin(), distZ.end(), probeParams[level]->distZH); std::copy(probeIndices.begin(), probeIndices.end(), probeParams[level]->pointIndicesH); - - cudaManager->cudaCopyProbeDistancesHtoD(this, level); cudaManager->cudaCopyProbeIndicesHtoD(this, level); uint arrOffset = 0; cudaManager->cudaAllocProbeQuantitiesAndOffsets(this, level); - for( int var=0; var<int(PostProcessingVariable::LAST); var++){ - if(this->quantities[var]) + for( int var=0; var<int(Statistic::LAST); var++) { - - probeParams[level]->quantitiesH[var] = true; - probeParams[level]->arrayOffsetsH[var] = arrOffset; - arrOffset += uint(getPostProcessingVariableNames(static_cast<PostProcessingVariable>(var)).size()); - }} + if(this->quantities[var]) + { + probeParams[level]->quantitiesH[var] = true; + probeParams[level]->arrayOffsetsH[var] = arrOffset; + arrOffset += uint( this->getPostProcessingVariables(static_cast<Statistic>(var)).size() ); + } + } cudaManager->cudaCopyProbeQuantitiesAndOffsetsHtoD(this, level); @@ -232,53 +268,67 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& pro probeParams[level]->quantitiesArrayH[arr*probeParams[level]->nPoints+point] = 0.0f; } } - cudaManager->cudaCopyProbeQuantityArrayHtoD(this, level); + if(this->hasDeviceQuantityArray) + cudaManager->cudaCopyProbeQuantityArrayHtoD(this, level); } void Probe::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) { - - if(t>this->tStartAvg) + if(max(int(t) - int(this->tStartAvg), -1) % this->tAvg==0) { SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level); - this->calculateQuantities(probeStruct, para, level); - probeStruct->vals++; + this->calculateQuantities(probeStruct, para, t, level); + if(t>=this->tStartTmpAveraging) probeStruct->vals++; + } - if(max(int(t) - int(this->tStartOut), -1) % this->tOut == 0) - { + if(max(int(t) - int(this->tStartOut), -1) % this->tOut == 0) + { + if(this->hasDeviceQuantityArray) cudaManager->cudaCopyProbeQuantityArrayDtoH(this, level); - - this->write(para, level, t); - } - + this->write(para, level, t); } } void Probe::free(Parameter* para, CudaMemoryManager* cudaManager) { for(int level=0; level<=para->getMaxLevel(); level++) - { - cudaManager->cudaFreeProbeDistances(this, level); + { + if(this->probeParams[level]->hasDistances) + cudaManager->cudaFreeProbeDistances(this, level); cudaManager->cudaFreeProbeIndices(this, level); cudaManager->cudaFreeProbeQuantityArray(this, level); cudaManager->cudaFreeProbeQuantitiesAndOffsets(this, level); } } -void Probe::addPostProcessingVariable(PostProcessingVariable variable) +void Probe::addStatistic(Statistic variable) { + assert(this->isAvailableStatistic(variable)); + this->quantities[int(variable)] = true; switch(variable) { - case PostProcessingVariable::Variances: - this->addPostProcessingVariable(PostProcessingVariable::Means); break; + case Statistic::Variances: + this->addStatistic(Statistic::Means); break; + default: break; } } +void Probe::addAllAvailableStatistics() +{ + for( int var=0; var < int(Statistic::LAST); var++) + { + if(this->isAvailableStatistic(static_cast<Statistic>(var))) + this->addStatistic(static_cast<Statistic>(var)); + } +} + void Probe::write(Parameter* para, int level, int t) { + int t_write = this->fileNameLU ? t: t/this->tOut; + const uint numberOfParts = this->getProbeStruct(level)->nPoints / para->getlimitOfNodesForVTK() + 1; std::vector<std::string> fnames; @@ -286,21 +336,22 @@ void Probe::write(Parameter* para, int level, int t) { std::string fname = this->probeName + "_bin_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyID()) - + "_Part_" + StringUtil::toString<int>(i) - + "_t_" + StringUtil::toString<int>(t) - + ".vtk"; + + "_Part_" + StringUtil::toString<int>(i); + if(!this->outputTimeSeries) fname += "_t_" + StringUtil::toString<int>(t_write); + fname += ".vtk"; fnames.push_back(fname); this->fileNamesForCollectionFile.push_back(fname); } this->writeGridFiles(para, level, fnames, t); - if(level == 0) this->writeCollectionFile(para, t); + if(level == 0 && !this->outputTimeSeries) this->writeCollectionFile(para, t); } void Probe::writeCollectionFile(Parameter* para, int t) { + int t_write = this->fileNameLU ? t: t/this->tOut; std::string filename = this->probeName + "_bin_ID_" + StringUtil::toString<int>(para->getMyID()) - + "_t_" + StringUtil::toString<int>(t) + + "_t_" + StringUtil::toString<int>(t_write) + ".vtk"; std::ofstream file; @@ -314,7 +365,7 @@ void Probe::writeCollectionFile(Parameter* para, int t) file << " <PPointData>" << std::endl; - for(std::string varName: this->getVarNames()) + for(std::string varName: this->getVarNames()) //TODO { file << " <DataArray type=\"Float64\" Name=\""<< varName << "\" /> " << std::endl; } @@ -355,7 +406,8 @@ void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& for (uint part = 0; part < fnames.size(); part++) { startpos = part * para->getlimitOfNodesForVTK(); - sizeOfNodes = min(para->getlimitOfNodesForVTK(), probeStruct->nPoints - startpos); + uint nDataPoints = this->outputTimeSeries? this->tProbe: probeStruct->nPoints; + sizeOfNodes = min(para->getlimitOfNodesForVTK(), nDataPoints - startpos); endpos = startpos + sizeOfNodes; ////////////////////////////////////////////////////////////////////////// @@ -370,38 +422,29 @@ void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes); - for( int var=0; var < int(PostProcessingVariable::LAST); var++){ - if(this->quantities[var]) - { - PostProcessingVariable quantity = static_cast<PostProcessingVariable>(var); - real coeff; - uint n_arrs = uint(getPostProcessingVariableNames(quantity).size()); - - switch(quantity) + for( int var=0; var < int(Statistic::LAST); var++){ + if(this->quantities[var]) { - case PostProcessingVariable::Instantaneous: - coeff = para->getVelocityRatio(); - break; - case PostProcessingVariable::Means: - coeff = para->getVelocityRatio(); - break; - case PostProcessingVariable::Variances: - coeff = pow(para->getVelocityRatio(),2); - break; - default: break; - } + Statistic statistic = static_cast<Statistic>(var); + real coeff; - uint arrOff = probeStruct->arrayOffsetsH[var]; - uint arrLen = probeStruct->nPoints; + std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic); + uint n_arrs = uint(postProcessingVariables.size()); - for(uint arr=0; arr<n_arrs; arr++) - { - for (uint pos = startpos; pos < endpos; pos++) + uint arrOff = probeStruct->arrayOffsetsH[var]; + uint arrLen = probeStruct->nPoints; + + for(uint arr=0; arr<n_arrs; arr++) { - nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff); + coeff = postProcessingVariables[arr].conversionFactor; + + for (uint pos = startpos; pos < endpos; pos++) + { + nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff); + } } } - }} + } WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(this->outputPath + "/" + fnames[part], nodes, nodedatanames, nodedata); } } @@ -409,11 +452,14 @@ void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& std::vector<std::string> Probe::getVarNames() { std::vector<std::string> varNames; - for( int var=0; var < int(PostProcessingVariable::LAST); var++){ - if(this->quantities[var]) + for( int statistic=0; statistic < int(Statistic::LAST); statistic++) { - std::vector<std::string> names = getPostProcessingVariableNames(static_cast<PostProcessingVariable>(var)); - varNames.insert(varNames.end(), names.begin(), names.end()); - }} + if(this->quantities[statistic]) + { + std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(static_cast<Statistic>(statistic)); + for(int i = 0; i<postProcessingVariables.size(); i++) + varNames.push_back(postProcessingVariables[i].name); + } + } return varNames; -} +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h index 988d1817e3b19bbfa5c25ed8d271a105ff433de9..d030d0c7a7344a44933b8114b7cd39c7ade3bf30 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h @@ -1,3 +1,45 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Probe.h +//! \author Henry Korb, Henrik Asmuth +//! \date 13/05/2022 +//! \brief Base class for probes called in UpdateGrid27 +//! +//! Any probe should be initiated in the app and added via para->addProbe( someProbe ) +//! Note, that all probes generally require that macroscopic variables have been updated in the +//! time step they are called in. Most collision kernels (atm, all except TurbulentViscosityCumulantK17CompChim ) +//! don't do this and would require an explicit call of calcMacroscopicQuantities. It does seem quite +//! inexpensive though to simply save vx, vy, etc., directly in the collider. +//! +//! \todo might have to adapt conversionFactors when using grid refinement +//======================================================================================= + #ifndef Probe_H #define Probe_H @@ -6,36 +48,80 @@ #include "PreCollisionInteractor/PreCollisionInteractor.h" #include "PointerDefinitions.h" -enum class PostProcessingVariable{ - // HowTo add new PostProcessingVariable: Add enum here, LAST has to stay last - // In interpQuantities add computation of quantity in switch statement - // In writeGridFiles add lb->rw conversion factor - // In getPostProcessingVariableNames add names - // If new quantity depends on other quantities i.e. mean, catch in addPostProcessingVariable +//======================================================================================= +//! \note How to add new Statistics +//! Generally, the Statistic enum refers to the type of statistic to be calculated. +//! It then depends on the derived probe class, which of these statistics are available. +//! Some type of statistics are only suitable for a certain probe class, others might +//! simply not have been implemented, yet. +//! For the same reasons it is also probe-specific, for which quantities (e.g. velocities, rho, etc.) these statistics are computed. +//! The specific quantity (e.g., mean of vx, or variance of rho) is defined as PostProcessingVariable in getPostProcessingVariables of each respective probe. +//! PostProcessingVariable also holds the name and conversionFactor of the quantity that is required when writing the data to file +//! +//! To add new Statistics: +//! 1. Add enum here, LAST has to stay last +//! 2. For PointProbe and PlaneProbe: add the computation of the statistic in switch statement in calculatePointwiseQuantities. +//! 3. For PlanarAverageProbe and WallModelProbe: add the computation directly in calculateQuantities. +//! 4. In getPostProcessingVariables add the static in the switch statement and add the corresponding PostProcessingVariables +//! 5. Add Statistic to isAvailableStatistic of the respective probe +//! +//! When adding new quantities to existing statistics (e.g., add rho to PlanarAverageProbe which currently only computes stats of velocity) only do steps 2 to 4 +//! + +enum class Statistic{ + // Variables currently available in Point and Plane probe (all temporal pointwise statistics) Instantaneous, Means, Variances, + + // Variables available in PlanarAverage probe and (partially) in WallModelProbe + // Spatial statistics are typically computed across fixed spatial subdomains, e.g. a plane of constant height + // Spatio-temporal statistics additionally average the spatial stats in time + SpatialMeans, + SpatioTemporalMeans, + SpatialCovariances, + SpatioTemporalCovariances, + SpatialSkewness, + SpatioTemporalSkewness, + SpatialFlatness, + SpatioTemporalFlatness, LAST, }; +typedef struct PostProcessingVariable{ + std::string name; + real conversionFactor; + PostProcessingVariable( std::string _name, + real _conversionFactor): + name(_name), conversionFactor(_conversionFactor){}; +} PostProcessingVariable; + struct ProbeStruct{ - uint nPoints, nArrays, vals; + uint nPoints, nIndices, nArrays, vals; uint *pointIndicesH, *pointIndicesD; real *pointCoordsX, *pointCoordsY, *pointCoordsZ; + bool hasDistances=false; real *distXH, *distYH, *distZH, *distXD, *distYD, *distZD; real *quantitiesArrayH, *quantitiesArrayD; bool *quantitiesH, *quantitiesD; uint *arrayOffsetsH, *arrayOffsetsD; }; -__global__ void interpQuantities( uint* pointIndices, +__global__ void calcQuantitiesKernel( uint* pointIndices, + uint nPoints, uint n, + real* vx, real* vy, real* vz, real* rho, + uint* neighborX, uint* neighborY, uint* neighborZ, + bool* quantities, + uint* quantityArrayOffsets, real* quantityArray + ); + +__global__ void interpAndCalcQuantitiesKernel( uint* pointIndices, uint nPoints, uint n, real* distX, real* distY, real* distZ, real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, bool* quantities, - uint* quantityArrayOffsets, real* quantityArray, - bool interpolate + uint* quantityArrayOffsets, real* quantityArray ); @@ -46,13 +132,21 @@ public: const std::string _probeName, const std::string _outputPath, uint _tStartAvg, + uint _tStartTmpAvg, + uint _tAvg, uint _tStartOut, - uint _tOut + uint _tOut, + bool _hasDeviceQuantityArray, + bool _outputTimeSeries ): probeName(_probeName), outputPath(_outputPath), tStartAvg(_tStartAvg), + tStartTmpAveraging(_tStartTmpAvg), + tAvg(_tAvg), tStartOut(_tStartOut), tOut(_tOut), + hasDeviceQuantityArray(_hasDeviceQuantityArray), + outputTimeSeries(_outputTimeSeries), PreCollisionInteractor() { assert("Output starts before averaging!" && tStartOut>=tStartAvg); @@ -64,9 +158,20 @@ public: SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; } - void addPostProcessingVariable(PostProcessingVariable _variable); + void addStatistic(Statistic _variable); + void addAllAvailableStatistics(); + + bool getHasDeviceQuantityArray(); + uint getTStartTmpAveraging(){return this->tStartTmpAveraging;} + + void setFileNameToNOut(){this->fileNameLU = false;} + void setTStartTmpAveraging(uint _tStartTmpAveraging){this->tStartTmpAveraging = _tStartTmpAveraging;} private: + virtual bool isAvailableStatistic(Statistic _variable) = 0; + + virtual std::vector<PostProcessingVariable> getPostProcessingVariables(Statistic variable) = 0; + virtual void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, @@ -75,7 +180,7 @@ private: std::vector<real>& distX, std::vector<real>& distY, std::vector<real>& distZ, std::vector<real>& pointCoordsX, std::vector<real>& pointCoordsY, std::vector<real>& pointCoordsZ, int level); - virtual void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level) = 0; + virtual void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) = 0; void write(Parameter* para, int level, int t); void writeCollectionFile(Parameter* para, int t); @@ -87,13 +192,28 @@ private: const std::string outputPath; std::vector<SPtr<ProbeStruct>> probeParams; - bool quantities[int(PostProcessingVariable::LAST)] = {}; + bool quantities[int(Statistic::LAST)] = {}; + bool hasDeviceQuantityArray; //!> flag initiating memCopy in Point and PlaneProbe. Other probes are only based on thrust reduce functions and therefore dont need explict memCopy in interact() + bool outputTimeSeries; //!> flag initiating overwrite of output vtk files, skipping collection files and limiting the length of the written data to the current time step (currently only used for WallModelProbe) std::vector<std::string> fileNamesForCollectionFile; std::vector<std::string> varNames; + bool fileNameLU = true; //!> if true, written file name contains time step in LU, else is the number of the written probe files + +protected: uint tStartAvg; + uint tStartTmpAveraging; //!> only non-zero in PlanarAverageProbe and WallModelProbe to switch on Spatio-temporal averaging (while only doing spatial averaging for t<tStartTmpAveraging) + uint tAvg; uint tStartOut; uint tOut; + + uint tProbe = 0; //!> counter for number of probe evaluations. Only used when outputting timeseries + + real velocityRatio; + real densityRatio; + real forceRatio; + real stressRatio; + real accelerationRatio; }; #endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu new file mode 100644 index 0000000000000000000000000000000000000000..15327beef059f298ec7dacc663f4f986fb577c5a --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu @@ -0,0 +1,300 @@ +#include "Probe.h" +#include "WallModelProbe.h" + +#include <cuda/CudaGrid.h> + +#include <cuda.h> +#include <cuda_runtime.h> +#include <helper_cuda.h> + +#include <thrust/device_vector.h> +#include <thrust/reduce.h> +#include <thrust/device_ptr.h> +#include <thrust/inner_product.h> + +#include "Parameter/Parameter.h" +#include "DataStructureInitializer/GridProvider.h" +#include "GPU/CudaMemoryManager.h" + + +/////////////////////////////////////////////////////////////////////////////////// +/// Functors for thrust reductions +/////////////////////////////////////////////////////////////////////////////////// + +template<typename T> +struct pow2 : public thrust::unary_function<T,T> +{ + __host__ __device__ T operator()(const T &x) const + { + return x * x; + } +}; + +template<typename T> +struct pow3 : public thrust::unary_function<T,T> +{ + __host__ __device__ T operator()(const T &x) const + { + return x * x * x; + } +}; + +template<typename T> +struct pow4 : public thrust::unary_function<T,T> +{ + __host__ __device__ T operator()(const T &x) const + { + return x * x * x * x; + } +}; + +struct nth_moment +{ + const float mean; + const int n; + + nth_moment(float _mean, int _n) : mean(_mean), n(_n) {} + + __host__ __device__ + float operator()(const float& x) const { + + real fluctuation = x-mean; + real moment = fluctuation; + for(int i = 1; i<n; i++) moment *= fluctuation; + + return moment; + } +}; + + +/////////////////////////////////////////////////////////////////////////////////// +bool WallModelProbe::isAvailableStatistic(Statistic _variable) +{ + bool isAvailable; + + switch (_variable) + { + case Statistic::Instantaneous: + case Statistic::Means: + case Statistic::Variances: + isAvailable = false; + break; + case Statistic::SpatialMeans: + case Statistic::SpatioTemporalMeans: + isAvailable = true; + break; + case Statistic::SpatialCovariances: + case Statistic::SpatioTemporalCovariances: + case Statistic::SpatialSkewness: + case Statistic::SpatioTemporalSkewness: + case Statistic::SpatialFlatness: + case Statistic::SpatioTemporalFlatness: + isAvailable = false; + break; + default: + isAvailable = false; + } + return isAvailable; +} + +/////////////////////////////////////////////////////////////////////////////////// + +std::vector<PostProcessingVariable> WallModelProbe::getPostProcessingVariables(Statistic statistic) +{ + std::vector<PostProcessingVariable> postProcessingVariables; + switch (statistic) + { + case Statistic::SpatialMeans: + postProcessingVariables.push_back( PostProcessingVariable("vx_el_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_el_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_el_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vx1_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy1_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz1_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("u_star_spatMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("Fx_spatMean", this->outputStress? this->stressRatio: this->forceRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("Fy_spatMean", this->outputStress? this->stressRatio: this->forceRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("Fz_spatMean", this->outputStress? this->stressRatio: this->forceRatio) ); + if(this->evaluatePressureGradient) + { + postProcessingVariables.push_back( PostProcessingVariable("dpdx_spatMean", this->accelerationRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("dpdy_spatMean", this->accelerationRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("dpdz_spatMean", this->accelerationRatio) ); + } + break; + case Statistic::SpatioTemporalMeans: + postProcessingVariables.push_back( PostProcessingVariable("vx_el_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy_el_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz_el_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vx1_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vy1_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("vz1_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("u_star_spatTmpMean", this->velocityRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("Fx_spatTmpMean", this->outputStress? this->stressRatio: this->forceRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("Fy_spatTmpMean", this->outputStress? this->stressRatio: this->forceRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("Fz_spatTmpMean", this->outputStress? this->stressRatio: this->forceRatio) ); + if(this->evaluatePressureGradient) + { + postProcessingVariables.push_back( PostProcessingVariable("dpdx_spatTmpMean", this->accelerationRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("dpdy_spatTmpMean", this->accelerationRatio) ); + postProcessingVariables.push_back( PostProcessingVariable("dpdz_spatTmpMean", this->accelerationRatio) ); + } + break; + + default: + printf("Statistic unavailable in WallModelProbe\n"); + assert(false); + break; + } + return postProcessingVariables; +} + +/////////////////////////////////////////////////////////////////////////////////// + +void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, + std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, + std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, + int level) +{ + assert( para->getParD(level)->kStressQ > 0 && para->gethasWallModelMonitor() ); + + real dt = para->getTimeRatio(); + uint nt = uint((para->getTEnd()-this->tStartAvg)/this->tAvg); + + for(uint t=0; t<nt; t++) + { + pointCoordsX_level.push_back(dt*(t*this->tAvg)+this->tStartAvg); // x coord will serve as time in this probe + pointCoordsY_level.push_back(0); + pointCoordsZ_level.push_back(0); + } + + if(this->evaluatePressureGradient) + { + assert(para->getIsBodyForce()); + // Find all fluid nodes + for(uint j=1; j<para->getParH(level)->size_Mat_SP; j++ ) + { + if( para->getParH(level)->geoSP[j] == GEO_FLUID) + { + probeIndices_level.push_back(j); + } + } + } +} + +/////////////////////////////////////////////////////////////////////////////////// + +void WallModelProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) +{ + bool doTmpAveraging = (t>this->getTStartTmpAveraging()); + + // Pointer casts to use device arrays in thrust reductions + thrust::device_ptr<real> u_el_thrust = thrust::device_pointer_cast(para->getParD(level)->QStress.Vx); + thrust::device_ptr<real> v_el_thrust = thrust::device_pointer_cast(para->getParD(level)->QStress.Vy); + thrust::device_ptr<real> w_el_thrust = thrust::device_pointer_cast(para->getParD(level)->QStress.Vz); + thrust::device_ptr<real> u1_thrust = thrust::device_pointer_cast(para->getParD(level)->QStress.Vx1); + thrust::device_ptr<real> v1_thrust = thrust::device_pointer_cast(para->getParD(level)->QStress.Vy1); + thrust::device_ptr<real> w1_thrust = thrust::device_pointer_cast(para->getParD(level)->QStress.Vz1); + thrust::device_ptr<real> u_star_thrust = thrust::device_pointer_cast(para->getParD(level)->wallModel.u_star); + thrust::device_ptr<real> Fx_thrust = thrust::device_pointer_cast(para->getParD(level)->wallModel.Fx); + thrust::device_ptr<real> Fy_thrust = thrust::device_pointer_cast(para->getParD(level)->wallModel.Fy); + thrust::device_ptr<real> Fz_thrust = thrust::device_pointer_cast(para->getParD(level)->wallModel.Fz); + thrust::device_ptr<real> dpdx_thrust = thrust::device_pointer_cast(para->getParD(level)->forceX_SP); + thrust::device_ptr<real> dpdy_thrust = thrust::device_pointer_cast(para->getParD(level)->forceY_SP); + thrust::device_ptr<real> dpdz_thrust = thrust::device_pointer_cast(para->getParD(level)->forceZ_SP); + + thrust::device_ptr<uint> indices_thrust = thrust::device_pointer_cast(probeStruct->pointIndicesD); + typedef thrust::device_vector<real>::iterator valIterator; + typedef thrust::device_vector<uint>::iterator indIterator; + thrust::permutation_iterator<valIterator, indIterator> dpdx_iter_begin(dpdx_thrust, indices_thrust); + thrust::permutation_iterator<valIterator, indIterator> dpdx_iter_end (dpdx_thrust, indices_thrust+probeStruct->nIndices); + thrust::permutation_iterator<valIterator, indIterator> dpdy_iter_begin(dpdy_thrust, indices_thrust); + thrust::permutation_iterator<valIterator, indIterator> dpdy_iter_end (dpdy_thrust, indices_thrust+probeStruct->nIndices); + thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_begin(dpdz_thrust, indices_thrust); + thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_end (dpdz_thrust, indices_thrust+probeStruct->nIndices); + + real N = para->getParD(level)->kStressQ; + real n = (real)probeStruct->vals; + int nPoints = probeStruct->nPoints; + + if(probeStruct->quantitiesH[int(Statistic::SpatialMeans)]) + { + // Compute the instantaneous spatial means of the velocity moments + real spatMean_u_el = thrust::reduce(u_el_thrust, u_el_thrust+N)/N; + real spatMean_v_el = thrust::reduce(v_el_thrust, v_el_thrust+N)/N; + real spatMean_w_el = thrust::reduce(w_el_thrust, w_el_thrust+N)/N; + real spatMean_u1 = thrust::reduce(u1_thrust, u1_thrust+N)/N; + real spatMean_v1 = thrust::reduce(v1_thrust, v1_thrust+N)/N; + real spatMean_w1 = thrust::reduce(w1_thrust, w1_thrust+N)/N; + real spatMean_u_star = thrust::reduce(u_star_thrust, u_star_thrust+N)/N; + real spatMean_Fx = thrust::reduce(Fx_thrust, Fx_thrust+N)/N; + real spatMean_Fy = thrust::reduce(Fy_thrust, Fy_thrust+N)/N; + real spatMean_Fz = thrust::reduce(Fz_thrust, Fz_thrust+N)/N; + + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialMeans)]; + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+tProbe] = spatMean_u_el; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+tProbe] = spatMean_v_el; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+tProbe] = spatMean_w_el; + probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+tProbe] = spatMean_u1; + probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+tProbe] = spatMean_v1; + probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+tProbe] = spatMean_w1; + probeStruct->quantitiesArrayH[(arrOff+6)*nPoints+tProbe] = spatMean_u_star; + probeStruct->quantitiesArrayH[(arrOff+7)*nPoints+tProbe] = spatMean_Fx; + probeStruct->quantitiesArrayH[(arrOff+8)*nPoints+tProbe] = spatMean_Fy; + probeStruct->quantitiesArrayH[(arrOff+9)*nPoints+tProbe] = spatMean_Fz; + + real spatMean_dpdx; + real spatMean_dpdy; + real spatMean_dpdz; + if(this->evaluatePressureGradient) + { + real N_fluid = (real)probeStruct->nIndices; + spatMean_dpdx = thrust::reduce(dpdx_iter_begin, dpdx_iter_end)/N_fluid; + spatMean_dpdy = thrust::reduce(dpdy_iter_begin, dpdy_iter_end)/N_fluid; + spatMean_dpdz = thrust::reduce(dpdz_iter_begin, dpdz_iter_end)/N_fluid; + probeStruct->quantitiesArrayH[(arrOff+10)*nPoints+tProbe] = spatMean_dpdx; + probeStruct->quantitiesArrayH[(arrOff+11)*nPoints+tProbe] = spatMean_dpdy; + probeStruct->quantitiesArrayH[(arrOff+12)*nPoints+tProbe] = spatMean_dpdz; + } + + if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalMeans)] && doTmpAveraging) + { + uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatioTemporalMeans)]; + real spatMean_u_el_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+tProbe-1]; + real spatMean_v_el_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+tProbe-1]; + real spatMean_w_el_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+tProbe-1]; + real spatMean_u1_old = probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+tProbe-1]; + real spatMean_v1_old = probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+tProbe-1]; + real spatMean_w1_old = probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+tProbe-1]; + real spatMean_u_star_old = probeStruct->quantitiesArrayH[(arrOff+6)*nPoints+tProbe-1]; + real spatMean_Fx_old = probeStruct->quantitiesArrayH[(arrOff+7)*nPoints+tProbe-1]; + real spatMean_Fy_old = probeStruct->quantitiesArrayH[(arrOff+8)*nPoints+tProbe-1]; + real spatMean_Fz_old = probeStruct->quantitiesArrayH[(arrOff+9)*nPoints+tProbe-1]; + + probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+tProbe] = spatMean_u_el_old + (spatMean_u_el-spatMean_u_el_old)/n; + probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+tProbe] = spatMean_v_el_old + (spatMean_v_el-spatMean_v_el_old)/n; + probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+tProbe] = spatMean_w_el_old + (spatMean_w_el-spatMean_w_el_old)/n; + probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+tProbe] = spatMean_u1_old + (spatMean_u1-spatMean_u1_old)/n; + probeStruct->quantitiesArrayH[(arrOff+4)*nPoints+tProbe] = spatMean_v1_old + (spatMean_v1-spatMean_v1_old)/n; + probeStruct->quantitiesArrayH[(arrOff+5)*nPoints+tProbe] = spatMean_w1_old + (spatMean_w1-spatMean_w1_old)/n; + probeStruct->quantitiesArrayH[(arrOff+6)*nPoints+tProbe] = spatMean_u_star_old +(spatMean_u_star-spatMean_u_star_old)/n; + probeStruct->quantitiesArrayH[(arrOff+7)*nPoints+tProbe] = spatMean_Fx_old + (spatMean_Fx-spatMean_Fx_old)/n; + probeStruct->quantitiesArrayH[(arrOff+8)*nPoints+tProbe] = spatMean_Fy_old + (spatMean_Fy-spatMean_Fy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+9)*nPoints+tProbe] = spatMean_Fz_old + (spatMean_Fz-spatMean_Fz_old)/n; + + if(this->evaluatePressureGradient) + { + real spatMean_dpdx_old = probeStruct->quantitiesArrayH[(arrOff+10)*nPoints+tProbe-1]; + real spatMean_dpdy_old = probeStruct->quantitiesArrayH[(arrOff+11)*nPoints+tProbe-1]; + real spatMean_dpdz_old = probeStruct->quantitiesArrayH[(arrOff+12)*nPoints+tProbe-1]; + probeStruct->quantitiesArrayH[(arrOff+10)*nPoints+tProbe] = spatMean_dpdx_old + (spatMean_dpdx-spatMean_dpdx_old)/n; + probeStruct->quantitiesArrayH[(arrOff+11)*nPoints+tProbe] = spatMean_dpdy_old + (spatMean_dpdy-spatMean_dpdy_old)/n; + probeStruct->quantitiesArrayH[(arrOff+12)*nPoints+tProbe] = spatMean_dpdz_old + (spatMean_dpdz-spatMean_dpdz_old)/n; + } + } + } + + this->tProbe += 1; + getLastCudaError("WallModelProbe::calculateQuantities execution failed"); +} + diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h new file mode 100644 index 0000000000000000000000000000000000000000..d6464c5ca2aa60310cc6bb7ca0a210bc12e755ff --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h @@ -0,0 +1,87 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WallModelProbe.h +//! \author Henrik Asmuth +//! \date 13/05/2022 +//! \brief Probe computing statistics of all relevant wall model quantities used in the StressBC kernels +//! +//! Computes spatial statistics for all grid points of the StressBC +//! The spatial statistics can additionally be averaged in time. +//! +//======================================================================================= + +#ifndef WallModelProbe_H +#define WallModelProbe_H + +#include "Probe.h" + +/////////////////////////////////////////////////////////////////////////////////// + +class WallModelProbe : public Probe +{ +public: + WallModelProbe( + const std::string _probeName, + const std::string _outputPath, + uint _tStartAvg, + uint _tStartTmpAvg, + uint _tAvg, + uint _tStartOut, + uint _tOut + ): Probe(_probeName, + _outputPath, + _tStartAvg, + _tStartTmpAvg, + _tAvg, + _tStartOut, + _tOut, + false, + true){} + + + void setForceOutputToStress(bool _outputStress){ this->outputStress = _outputStress; } + void setEvaluatePressureGradient(bool _evalPressGrad){ this->evaluatePressureGradient = _evalPressGrad; } + +private: + bool isAvailableStatistic(Statistic _variable) override; + + std::vector<PostProcessingVariable> getPostProcessingVariables(Statistic variable) override; + + void findPoints(Parameter* para, GridProvider* gridProvider, std::vector<int>& probeIndices_level, + std::vector<real>& distX_level, std::vector<real>& distY_level, std::vector<real>& distZ_level, + std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level, + int level) override; + void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) override; + +private: + bool outputStress = false; //!> if true, output wall force is converted to a stress + bool evaluatePressureGradient = false; //!> if true, mean global pressure gradient will also be evaluated +}; + +#endif \ No newline at end of file