diff --git a/.gitignore b/.gitignore index f87c8efbbd3b3877bd77212d6c2184db2aa409f1..1e33ea527c553631ad2e37051501af109c407b53 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ dist/ *.egg-info/ __pycache__/ .venv/ +pyfluids* # IDE .vscode/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b34c5a8f66c1340670b6acd80ea6a9901b2760d1..8e3857684f3d9049d87fba227c6833693ec539a7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -132,7 +132,7 @@ gcc_9_python: - export CCACHE_DIR=$CI_PROJECT_DIR/cache script: - - python3 setup.py bdist_wheel build_ext --build-temp=build + - python3 setup.py bdist_wheel build_ext --build-temp=build -DBUILD_VF_CPU=ON ############################################################################### ## Container Upload ## diff --git a/CMakePresets.json b/CMakePresets.json index 0f360fd303cdcad923b01d56df5c6d48ad62ca2c..c53482ec72109f1a672b97797763d027a6ec80bf 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -9,26 +9,29 @@ { "name": "default", "hidden": true, - "binaryDir": "${sourceDir}/build/", - "cacheVariables": { - "BUILD_VF_UNIT_TESTS": "ON" - } + "binaryDir": "${sourceDir}/build/" }, { - "name": "default_make", - "inherits": "default", + "name": "msvc", + "hidden": true, + "generator": "Visual Studio 16 2019", + "architecture": "x64" + }, + { + "name": "make", "hidden": true, "generator": "Unix Makefiles" }, { - "name": "default_msvc", - "inherits": "default", + "name": "unit_tests", "hidden": true, - "generator": "Visual Studio 16 2019", - "architecture": "x64" + "cacheVariables": { + "BUILD_VF_UNIT_TESTS": "ON" + } }, { - "name": "default_cpu", + "name": "cpu", + "inherits": "default", "hidden": true, "description": "CPU build of VirtualFluids", "cacheVariables": { @@ -37,7 +40,8 @@ } }, { - "name": "default_gpu", + "name": "gpu", + "inherits": "default", "hidden": true, "description": "GPU build of VirtualFluids", "cacheVariables": { @@ -46,92 +50,213 @@ } }, { - "name": "default_gpu_numerical_tests", - "inherits": [ - "default_gpu" - ], + "name": "debug", "hidden": true, - "description": "GPU numerical tests of VirtualFluids", "cacheVariables": { - "BUILD_VF_DOUBLE_ACCURACY": "ON", - "BUILD_NUMERIC_TESTS": "ON" + "CMAKE_BUILD_TYPE": "Debug" } }, { - "name": "default_all", + "name": "release", "hidden": true, - "description": "All build of VirtualFluids", - "inherits": [ - "default_cpu", - "default_gpu" - ], "cacheVariables": { - "BUILD_VF_DOUBLE_ACCURACY": "ON" + "CMAKE_BUILD_TYPE": "Release" } }, { - "name": "cpu_make", - "inherits": [ - "default_make", - "default_cpu" - ], - "displayName": "cpu make configuration" - }, - { - "name": "cpu_msvc", - "inherits": [ - "default_msvc", - "default_cpu" - ], - "displayName": "cpu msvc configuration" + "name": "min_size_rel", + "hidden": true, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "MinSizeRel" + } }, { - "name": "gpu_make", - "inherits": [ - "default_make", - "default_gpu" - ], - "displayName": "gpu make configuration" + "name": "rel_with_deb_info", + "hidden": true, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo" + } }, { - "name": "gpu_msvc", + "name": "gpu_numerical_tests", "inherits": [ - "default_msvc", - "default_gpu" + "gpu", + "unit_tests" ], - "displayName": "gpu msvc configuration" + "hidden": true, + "description": "GPU numerical tests of VirtualFluids", + "cacheVariables": { + "BUILD_VF_DOUBLE_ACCURACY": "ON", + "BUILD_NUMERIC_TESTS": "ON" + } }, { "name": "all_make", "inherits": [ - "default_make", - "default_all" + "cpu", + "gpu", + "unit_tests", + "make" ], "displayName": "all make configuration" }, { "name": "all_msvc", "inherits": [ - "default_msvc", - "default_all" + "cpu", + "gpu", + "unit_tests", + "msvc" ], "displayName": "all msvc configuration" }, { "name": "gpu_numerical_tests_make", "inherits": [ - "default_make", - "default_gpu_numerical_tests" + "gpu_numerical_tests", + "make" ], "displayName": "gpu numerical tests make configuration" }, { "name": "gpu_numerical_tests_msvc", "inherits": [ - "default_msvc", - "default_gpu_numerical_tests" + "msvc", + "gpu_numerical_tests" ], "displayName": "gpu numerical tests msvc configuration" + }, + { + "name": "debug_make_gpu", + "displayName": "Debug GPU Make", + "inherits": [ + "gpu", + "make", + "debug" + ] + }, + { + "name": "release_make_gpu", + "displayName": "Release GPU Make", + "inherits": [ + "gpu", + "make", + "release" + ] + }, + { + "name": "min_size_rel_make_gpu", + "displayName": "MinSizeRel GPU Make", + "inherits": [ + "gpu", + "make", + "min_size_rel" + ] + }, + { + "name": "rel_with_deb_info_make_gpu", + "displayName": "RelWithDebInfo GPU Make", + "inherits": [ + "gpu", + "make", + "rel_with_deb_info" + ] + }, + { + "name": "debug_msvc_gpu", + "displayName": "Debug GPU MSVC", + "inherits": [ + "gpu", + "msvc", + "debug" + ] + }, + { + "name": "release_msvc_gpu", + "displayName": "Release GPU MSVC", + "inherits": [ + "gpu", + "msvc", + "release" + ] + }, + { + "name": "min_size_rel_msvc_gpu", + "displayName": "MinSizeRel GPU MSVC", + "inherits": [ + "gpu", + "msvc", + "min_size_rel" + ] + }, + { + "name": "rel_with_deb_info_msvc_gpu", + "displayName": "RelWithDebInfo GPU MSVC", + "inherits": [ + "gpu", + "msvc", + "rel_with_deb_info" + ] + } + ], + "buildPresets": [ + { + "name": "Default", + "hidden": true, + "configurePreset": "default", + "jobs": 4 + }, + { + "name": "GPU", + "hidden": true, + "configurePreset": "gpu", + "targets": [ + "ActuatorLine", + "DrivenCavity", + "BoundaryLayer" + ], + "inherits": [ + "Default" + ] + }, + { + "name": "Release", + "hidden": true, + "configurePreset": "release" + }, + { + "name": "Debug_Make_GPU", + "displayName": "Debug", + "description": "Compile GPU version with debug information", + "configurePreset": "debug_make_gpu", + "inherits": [ + "GPU" + ] + }, + { + "name": "MinSizeRel_Make_GPU", + "displayName": "MinSizeRel", + "configurePreset": "min_size_rel_make_gpu", + "inherits": [ + "GPU" + ] + }, + { + "name": "RelWithDebInfo_GPU", + "displayName": "RelWithDebInfo", + "configurePreset": "rel_with_deb_info_make_gpu", + "inherits": [ + "GPU" + ] + }, + { + "name": "Release_GPU", + "description": "Build release version of GPU", + "displayName": "Release GPU", + "configurePreset": "release_make_gpu", + "inherits": [ + "GPU" + ] } ] -} +} \ No newline at end of file diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py index 6e3c8608617df1267535984d53307dea9184c6ab..ecd0fe0602bba83275798928fabce9339f20763e 100644 --- a/Python/actuator_line/actuator_line.py +++ b/Python/actuator_line/actuator_line.py @@ -4,20 +4,11 @@ from pathlib import Path from mpi4py import MPI from pyfluids import basics, gpu, logger #%% -reference_diameter = 126 - -length = np.array([29,6,6])*reference_diameter -viscosity = 1.56e-5 -velocity = 9 -mach = 0.1 -nodes_per_diameter = 32 - -sim_name = "ActuatorLine" -config_file = Path(__file__).parent/Path("config.txt") +sim_name = "ABL" +config_file = Path(__file__).parent/"configActuatorLine.txt" output_path = Path(__file__).parent/Path("output") output_path.mkdir(exist_ok=True) -t_out = 100. -t_end = 500. + #%% logger.Logger.initialize_logger() @@ -25,87 +16,169 @@ basics.logger.Logger.add_stdout() basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW) basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE) basics.logger.Logger.enable_printed_rank_numbers(True) -# %% -comm = gpu.Communicator.get_instance() #%% grid_factory = gpu.grid_generator.GridFactory.make() grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory) +communicator = gpu.Communicator.get_instance() -#%% -dx = reference_diameter/nodes_per_diameter - -grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) -grid_builder.set_periodic_boundary_condition(False, False, False) -grid_builder.build_grids(basics.LbmOrGks.LBM, False) -#%% config = basics.ConfigurationFile() config.load(str(config_file)) + +para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config) +bc_factory = gpu.BoundaryConditionFactory() + #%% -para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid()) +turbine_diameter = config.get_float_value("turbineDiameter", 126) +boundary_layer_height = config.get_float_value("boundaryLayerHeight", 1000) +z0 = config.get_float_value("z0", 0.1) +u_star = config.get_float_value("u_star", 0.4) + +kappa = config.get_float_value("vonKarmanConstant", 0.4) # von Karman constant + +viscosity = config.get_float_value("viscosity", 1.56e-5) +velocity = 0.5*u_star/kappa*np.log(boundary_layer_height/z0+1) #0.5 times max mean velocity at the top in m/s + +mach = config.get_float_value("Ma", 0.1) +nodes_per_height = config.get_uint_value("nz", 64) + + +turb_pos = np.array([3,3,3])*turbine_diameter +epsilon = config.get_float_value("SmearingWidth", 5) +density = config.get_float_value("Density", 1.225) +level = 0 +n_blades = 3 +n_blade_nodes = config.get_int_value("NumberOfNodesPerAL", 32) + +read_precursor = config.get_bool_value("readPrecursor", False) + +if read_precursor: + nTReadPrecursor = config.get_int_value("nTimestepsReadPrecursor") + use_distributions = config.get_bool_value("useDistributions", False) + precursor_directory = config.get_string_value("precursorDirectory") + +# all in s +t_start_out = config.get_float_value("tStartOut") +t_out = config.get_float_value("tOut") +t_end = config.get_float_value("tEnd") # total time of simulation + +t_start_averaging = config.get_float_value("tStartAveraging") +t_start_tmp_averaging = config.get_float_value("tStartTmpAveraging") +t_averaging = config.get_float_value("tAveraging") +t_start_out_probe = config.get_float_value("tStartOutProbe") +t_out_probe = config.get_float_value("tOutProbe") + +#%% +length = np.array([6,4,1])*boundary_layer_height +dx = boundary_layer_height/nodes_per_height dt = dx * mach / (np.sqrt(3) * velocity) -velocity_lb = velocity * dt / dx # LB units -viscosity_lb = viscosity * dt / (dx * dx) # LB units +velocity_ratio = dx/dt +velocity_LB = velocity / velocity_ratio # LB units +viscosity_LB = viscosity / (velocity_ratio * dx) # LB units +pressure_gradient = u_star * u_star / boundary_layer_height +pressure_gradient_LB = pressure_gradient * (dt*dt)/dx + +logger.vf_log_info(f"velocity [dx/dt] = {velocity_LB}") +logger.vf_log_info(f"dt = {dt}") +logger.vf_log_info(f"dx = {dx}") +logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}") +logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}") +logger.vf_log_info(f"dpdx = {pressure_gradient}") +logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}") + #%% -para.set_devices([0]) para.set_output_prefix(sim_name) -para.set_output_path(str(output_path)) -para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix()) para.set_print_files(True) -para.set_max_level(1) -#%% -para.set_velocity(velocity_lb) -para.set_viscosity(viscosity_lb) + +para.set_forcing(pressure_gradient_LB, 0, 0) +para.set_velocity_LB(velocity_LB) +para.set_viscosity_LB(viscosity_LB) para.set_velocity_ratio(dx/dt) para.set_viscosity_ratio(dx*dx/dt) -para.set_main_kernel("TurbulentViscosityCumulantK17CompChim") -para.set_use_AMD(True) -para.set_SGS_constant(0.083) +para.set_density_ratio(1.0) -def init_func(coord_x, coord_y, coord_z): - return [0.0, velocity_lb, 0.0, 0.0] +para.set_main_kernel("TurbulentViscosityCumulantK17CompChim") -para.set_initial_condition(init_func) -para.set_t_out(int(t_out/dt)) -para.set_t_end(int(t_end/dt)) +para.set_timestep_start_out(int(t_start_out/dt)) +para.set_timestep_out(int(t_out/dt)) +para.set_timestep_end(int(t_end/dt)) para.set_is_body_force(True) - #%% -grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_lb, 0.0, 0.0) - -grid_builder.set_velocity_boundary_condition(gpu.SideType.MY, velocity_lb, 0.0, 0.0) -grid_builder.set_velocity_boundary_condition(gpu.SideType.PY, velocity_lb, 0.0, 0.0) - -grid_builder.set_velocity_boundary_condition(gpu.SideType.MZ, velocity_lb, 0.0, 0.0) -grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0, 0.0) - -grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0.0) +tm_factory = gpu.TurbulenceModelFactory(para) +tm_factory.read_config_file(config) +#%% +grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) +grid_builder.set_periodic_boundary_condition(not read_precursor, True, False) +grid_builder.build_grids(basics.LbmOrGks.LBM, False) +sampling_offset = 2 +if read_precursor: + precursor = gpu.create_file_collection(precursor_directory + "/precursor", gpu.FileType.VTK) + grid_builder.set_precursor_boundary_condition(gpu.SideType.MX, precursor, nTReadPrecursor, 0, 0, 0) + +grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0, 0, 1, sampling_offset, z0/dx) +para.set_has_wall_monitor(True) +grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0, 0, -1) + +if read_precursor: + grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0) +bc_factory.set_stress_boundary_condition(gpu.StressBC.StressPressureBounceBack) +bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack) +bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective) +if read_precursor: + bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor) +para.set_outflow_pressure_correction_factor(0.0); #%% -cuda_memory_manager = gpu.CudaMemoryManager(para) -grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm) +# don't use python init functions, they are very slow! Just kept as an example. +# Define lambda in bindings and set it here. +# def init_func(coord_x, coord_y, coord_z): +# return [ +# 0.0, +# (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1)) * dt / dx, +# 2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.) * dt / dx, +# 8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx] +# para.set_initial_condition(init_func) +para.set_initial_condition_perturbed_log_law(u_star, z0, length[0], length[2], boundary_layer_height, velocity_ratio) + #%% -turb_pos = np.array([3,3,3])*reference_diameter +turb_pos = np.array([3,3,3])*turbine_diameter epsilon = 5 density = 1.225 level = 0 n_blades = 3 n_blade_nodes = 32 -alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx) +alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, turbine_diameter, level, dt, dx, True) para.add_actuator(alm) #%% -point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 1, 500, 100) -point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter) -point_probe.add_statistic(gpu.probes.Statistic.Means) - -para.add_probe(point_probe) - -plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 1, 500, 100) -plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2]) -para.add_probe(plane_probe) +planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z') +planar_average_probe.add_all_available_statistics() +planar_average_probe.set_file_name_to_n_out() +para.add_probe(planar_average_probe) #%% -sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator) +wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt)) +wall_model_probe.add_all_available_statistics() +wall_model_probe.set_file_name_to_n_out() +wall_model_probe.set_force_output_to_stress(True) +if para.get_is_body_force(): + wall_model_probe.set_evaluate_pressure_gradient(True) +para.add_probe(wall_model_probe) + +plane_locs = [100,] +if read_precursor: plane_locs.extend([1000, 1500, 2000, 2500, 0]) + +for n_probe, probe_pos in enumerate(plane_locs): + plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), 10, int(t_start_out_probe/dt), int(t_out_probe/dt)) + plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2]) + plane_probe.add_all_available_statistics() + para.add_probe(plane_probe) +#%% +cuda_memory_manager = gpu.CudaMemoryManager(para) +grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator) +#%% +#%% +sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory) #%% sim.run() -MPI.Finalize() \ No newline at end of file +MPI.Finalize() + diff --git a/Python/actuator_line/config.txt b/Python/actuator_line/config.txt deleted file mode 100644 index e4c778c4cc048f54c0a32310e6bf4a7343a263fa..0000000000000000000000000000000000000000 --- a/Python/actuator_line/config.txt +++ /dev/null @@ -1,2 +0,0 @@ -Path = . -GridPath = . diff --git a/Python/actuator_line/configActuatorLine.txt b/Python/actuator_line/configActuatorLine.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b38bc41c1d5b510e7b262423fff861dc1a9c030 --- /dev/null +++ b/Python/actuator_line/configActuatorLine.txt @@ -0,0 +1,39 @@ +################################################## +#informations for Writing +################################################## +Path = . +################################################## +#informations for reading +################################################## +GridPath = . +################################################## +Devices = 0 +################################################## +tStartOut = 0 +tOut = 100000 +tEnd = 300000 +################################################## +tStartAveraging = 0 +tStartTmpAveraging = 100000 +tAveraging = 200 +tStartOutProbe = 0 +tOutProbe = 1000 +################################################## +Ma = 0.1 +nz = 96 + +bodyForce = true +SGSconstant = 0.333 +TurbulenceModel = QR + +QuadricLimiterP = 100000.0 +QuadricLimiterM = 100000.0 +QuadricLimiterD = 100000.0 + +################################################## +readPrecursor = false +nTimestepsReadPrecursor = 10 +precursorFile = precursor/Precursor + +################################################## +turbineDiameter = 126.0 diff --git a/Python/boundary_layer/__init__.py b/Python/boundary_layer/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py index 1c01f50946b49bc0ddab7e50065a24aab4ae869f..269ac887440d93a64bc26e2117926a59371874d2 100644 --- a/Python/boundary_layer/boundary_layer.py +++ b/Python/boundary_layer/boundary_layer.py @@ -4,34 +4,11 @@ from pathlib import Path from mpi4py import MPI from pyfluids import basics, gpu, logger #%% -reference_height = 1000 # boundary layer height in m - -length = np.array([6,4,1])*reference_height -viscosity = 1.56e-5 -mach = 0.1 -nodes_per_height = 32 - -z_0 = 0.1 -u_star = 0.4 -kappa = 0.4 - -velocity = 0.5*u_star/kappa*np.log(length[2]/z_0+1) -flow_through_time = length[0]/velocity -use_AMD = True - - -sim_name = "BoundaryLayer" -config_file = Path(__file__).parent/Path("config.txt") +sim_name = "ABL" +config_file = Path(__file__).parent/"configBoundaryLayer.txt" output_path = Path(__file__).parent/Path("output") output_path.mkdir(exist_ok=True) -t_out = 1000. -t_end = 5000. -t_start_averaging = 0 -t_start_tmp_averaging = 100_000 -t_averaging = 200 -t_start_out_probe = 0 -t_out_probe = 1000 #%% logger.Logger.initialize_logger() @@ -39,95 +16,161 @@ basics.logger.Logger.add_stdout() basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW) basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE) basics.logger.Logger.enable_printed_rank_numbers(True) -# %% -comm = gpu.Communicator.get_instance() #%% grid_factory = gpu.grid_generator.GridFactory.make() grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory) +communicator = gpu.Communicator.get_instance() + +config = basics.ConfigurationFile() +config.load(str(config_file)) + +para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config) +bc_factory = gpu.BoundaryConditionFactory() #%% -dx = reference_height/nodes_per_height -dt = dx * mach / (np.sqrt(3) * velocity) -velocity_lb = velocity * dt / dx # LB units -viscosity_lb = viscosity * dt / (dx * dx) # LB units +boundary_layer_height = config.get_float_value("boundaryLayerHeight", 1000) +z0 = config.get_float_value("z0", 0.1) +u_star = config.get_float_value("u_star", 0.4) -pressure_gradient = u_star**2 / reference_height -pressure_gradient_lb = pressure_gradient * dt**2 / dx +kappa = config.get_float_value("vonKarmanConstant", 0.4) # von Karman constant -logger.vf_log_info(f"velocity = {velocity_lb:1.6} dx/dt") -logger.vf_log_info(f"dt = {dt:1.6}") -logger.vf_log_info(f"dx = {dx:1.6}") -logger.vf_log_info(f"u* = {u_star:1.6}") -logger.vf_log_info(f"dpdx = {pressure_gradient:1.6}") -logger.vf_log_info(f"dpdx = {pressure_gradient_lb:1.6} dx/dt^2") -logger.vf_log_info(f"viscosity = {viscosity_lb:1.6} dx^2/dt") +viscosity = config.get_float_value("viscosity", 1.56e-5) +velocity = 0.5*u_star/kappa*np.log(boundary_layer_height/z0+1) #0.5 times max mean velocity at the top in m/s -#%% -config = basics.ConfigurationFile() -config.load(str(config_file)) -#%% -para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid()) +mach = config.get_float_value("Ma", 0.1) +nodes_per_height = config.get_uint_value("nz", 64) + + + +write_precursor = config.get_bool_value("_p", False) +read_precursor = config.get_bool_value("readPrecursor", False) + +if write_precursor: + nTWritePrecursor = config.get_int_value("nTimestepsWritePrecursor") + t_start_precursor = config.get_float_value("tStartPrecursor") + pos_x_precursor = config.get_float_value("posXPrecursor") +if read_precursor: + nTReadPrecursor = config.get_int_value("nTimestepsReadPrecursor") +if write_precursor or read_precursor: + use_distributions = config.get_bool_value("useDistributions", False) + precursor_directory = config.get_string_value("precursorDirectory") + +# all in s +t_start_out = config.get_float_value("tStartOut") +t_out = config.get_float_value("tOut") +t_end = config.get_float_value("tEnd") # total time of simulation + +t_start_averaging = config.get_float_value("tStartAveraging") +t_start_tmp_averaging = config.get_float_value("tStartTmpAveraging") +t_averaging = config.get_float_value("tAveraging") +t_start_out_probe = config.get_float_value("tStartOutProbe") +t_out_probe = config.get_float_value("tOutProbe") + +#%% +length = np.array([6,4,1])*boundary_layer_height +dx = boundary_layer_height/nodes_per_height +dt = dx * mach / (np.sqrt(3) * velocity) +velocity_LB = velocity * dt / dx # LB units +viscosity_LB = viscosity * dt / (dx * dx) # LB units +pressure_gradient = u_star * u_star / boundary_layer_height +pressure_gradient_LB = pressure_gradient * (dt*dt)/dx + +logger.vf_log_info(f"velocity [dx/dt] = {velocity_LB}") +logger.vf_log_info(f"dt = {dt}") +logger.vf_log_info(f"dx = {dx}") +logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}") +logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}") +logger.vf_log_info(f"dpdx = {pressure_gradient}") +logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}") + +#%% #%% -para.set_devices([0]) para.set_output_prefix(sim_name) -para.set_output_path(str(output_path)) -para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix()) para.set_print_files(True) -para.set_max_level(1) -#%% -para.set_velocity(velocity_lb) -para.set_viscosity(viscosity_lb) + +para.set_forcing(pressure_gradient_LB, 0, 0) +para.set_velocity_LB(velocity_LB) +para.set_viscosity_LB(viscosity_LB) para.set_velocity_ratio(dx/dt) para.set_viscosity_ratio(dx*dx/dt) -para.set_use_AMD(use_AMD) +para.set_density_ratio(1.0) -para.set_main_kernel("TurbulentViscosityCumulantK17CompChim" if para.get_use_AMD() else "CummulantK17CompChim") +para.set_main_kernel("TurbulentViscosityCumulantK17CompChim") -para.set_SGS_constant(0.083) +para.set_timestep_start_out(int(t_start_out/dt)) +para.set_timestep_out(int(t_out/dt)) +para.set_timestep_end(int(t_end/dt)) +para.set_is_body_force(config.get_bool_value("bodyForce")) +#%% +tm_factory = gpu.TurbulenceModelFactory(para) +tm_factory.read_config_file(config) +#%% +grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) +grid_builder.set_periodic_boundary_condition(not read_precursor, True, False) +grid_builder.build_grids(basics.LbmOrGks.LBM, False) +sampling_offset = 2 +if read_precursor: + precursor = gpu.create_file_collection(precursor_directory + "/precursor", gpu.FileType.VTK) + grid_builder.set_precursor_boundary_condition(gpu.SideType.MX, precursor, nTReadPrecursor, 0, 0, 0) + +grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0, 0, 1, sampling_offset, z0/dx) +para.set_has_wall_monitor(True) +grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0, 0, -1) + +if read_precursor: + grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0) +bc_factory.set_stress_boundary_condition(gpu.StressBC.StressPressureBounceBack) +bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack) +bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective) +bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor) +para.set_outflow_pressure_correction_factor(0.0); +#%% def init_func(coord_x, coord_y, coord_z): return [ 0.0, - (u_star/kappa*np.log(max(coord_z/z_0,0)+1) + 2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2]))/((coord_z/reference_height)**2+0.1)*dt/dx, - 2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2])/((coord_z/reference_height)**2+0.1)*dt/dx, - 8*u_star/kappa*(np.sin(np.pi*8*coord_y/reference_height)*np.sin(np.pi*8*coord_z/reference_height)+np.sin(np.pi*8*coord_x/length[0]))/((length[2]/2-coord_z)**2+0.1)*dt/dx - ] - + (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1)) * dt / dx, + 2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.) * dt / dx, + 8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx] para.set_initial_condition(init_func) -para.set_t_out(int(t_out/dt)) -para.set_t_end(int(t_end/dt)) -para.set_is_body_force(True) -para.set_has_wall_model_monitor(True) - -grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) -grid_builder.set_periodic_boundary_condition(True, True, False) -grid_builder.build_grids(basics.LbmOrGks.LBM, False) #%% -sampling_offset = 2 -grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0.0, 0.0, 1.0, sampling_offset, z_0/dx) -grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0.0, 0.0, 0.0) +planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z') +planar_average_probe.add_all_available_statistics() +planar_average_probe.set_file_name_to_n_out() +para.add_probe(planar_average_probe) +#%% +wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt)) +wall_model_probe.add_all_available_statistics() +wall_model_probe.set_file_name_to_n_out() +wall_model_probe.set_force_output_to_stress(True) +if para.get_is_body_force(): + wall_model_probe.set_evaluate_pressure_gradient(True) +para.add_probe(wall_model_probe) + +plane_locs = [100,] +if read_precursor: plane_locs.extend([1000, 1500, 2000, 2500, 0]) + +for n_probe, probe_pos in enumerate(plane_locs): + plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), 10, int(t_start_out_probe/dt), int(t_out_probe/dt)) + plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2]) + plane_probe.add_all_available_statistics() + para.add_probe(plane_probe) + +if write_precursor: + precursor_writer = gpu.PrecursorWriter("precursor", para.get_output_path() + precursor_directory, pos_x_precursor, 0,length[1], 0, length[2], t_start_precursor/dt, nTWritePrecursor, gpu.OutputVariable.Distributions if use_distributions else gpu.OutputVariable.Velocities) + para.add_probe(precursor_writer) #%% cuda_memory_manager = gpu.CudaMemoryManager(para) -grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm) - +grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator) #%% -wall_probe = gpu.probes.WallModelProbe("wallModelProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt)) -wall_probe.add_all_available_statistics() -wall_probe.set_file_name_to_n_out() -wall_probe.set_force_output_to_stress(True) -if para.get_is_body_force(): - wall_probe.set_evaluate_pressure_gradient(True) -planar_probe = gpu.probes.PlanarAverageProbe("planarAverageProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt), int(t_start_out_probe/dt), int(t_out_probe/dt), "z") -para.add_probe(wall_probe) - #%% -sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator) +sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory) #%% sim.run() MPI.Finalize() \ No newline at end of file diff --git a/Python/boundary_layer/config.txt b/Python/boundary_layer/config.txt deleted file mode 100644 index e4c778c4cc048f54c0a32310e6bf4a7343a263fa..0000000000000000000000000000000000000000 --- a/Python/boundary_layer/config.txt +++ /dev/null @@ -1,2 +0,0 @@ -Path = . -GridPath = . diff --git a/Python/boundary_layer/configBoundaryLayer.txt b/Python/boundary_layer/configBoundaryLayer.txt new file mode 100644 index 0000000000000000000000000000000000000000..83e7861a5fb85ea800d187699f1c6c1409422f0a --- /dev/null +++ b/Python/boundary_layer/configBoundaryLayer.txt @@ -0,0 +1,42 @@ +################################################## +#informations for Writing +################################################## +Path = . +################################################## +#informations for reading +################################################## +GridPath = . +################################################## +Devices = 0 +################################################## +tStartOut = 0 +tOut = 100000 +tEnd = 300000 +################################################## +tStartAveraging = 0 +tStartTmpAveraging = 100000 +tAveraging = 200 +tStartOutProbe = 0 +tOutProbe = 1000 +################################################## +Ma = 0.1 +nz = 96 + +bodyForce = true +UseAMD = true +SGSconstant = 0.2 +QuadricLimiterP = 100000.0 +QuadricLimiterM = 100000.0 +QuadricLimiterD = 100000.0 + +################################################## +readPrecursor = false +nTimestepsReadPrecursor = 10 +precursorFile = precursor/Precursor + +################################################## +writePrecursor = false +nTimestepsWritePrecursor = 10 + +tStartPrecursor = 100 +posXPrecursor = 3000 \ No newline at end of file diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp index baa8424c8b1ae6f04c41ec0de52f8c1fd0a9e1d6..a66f0da22edc6268c39d7856307ad5ad91658414 100644 --- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp +++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp @@ -28,12 +28,14 @@ #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" #include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" + #include "GridGenerator/grid/GridFactory.h" #include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" #include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" -#include "GridGenerator/io/STLReaderWriter/STLReader.h" -#include "GridGenerator/io/STLReaderWriter/STLWriter.h" +#include "GridGenerator/VelocitySetter/VelocitySetter.h" + ////////////////////////////////////////////////////////////////////////// @@ -49,6 +51,7 @@ #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h" #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" @@ -99,7 +102,6 @@ void multipleLevel(const std::string& configPath) vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance(); auto gridFactory = GridFactory::make(); - gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT); auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -173,7 +175,10 @@ void multipleLevel(const std::string& configPath) gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible); - bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible); + bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflectivePressureCorrection); + + SPtr<TurbulenceModelFactory> tmFactory = std::make_shared<TurbulenceModelFactory>(para); + tmFactory->readConfigFile(config); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -192,28 +197,31 @@ void multipleLevel(const std::string& configPath) actuator_farm->addTurbine(turbPos[0], turbPos[1], turbPos[2], reference_diameter, omega, 0, 0, bladeRadii); para->addActuator( actuator_farm ); - // SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", para->getOutputPath(), 100, 1, 500, 100) ); - // std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter}; - // std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; - // std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; - // pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ); - // // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx); + SPtr<ActuatorLine> actuator_line = std::make_shared<ActuatorLine>(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx, true); + para->addActuator( actuator_line ); + + SPtr<PointProbe> pointProbe = std::make_shared<PointProbe>("pointProbe", para->getOutputPath(), 100, 1, 500, 100); + std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter}; + std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; + std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; + pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ); + // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx); - // pointProbe->addStatistic(Statistic::Means); - // pointProbe->addStatistic(Statistic::Variances); - // para->addProbe( pointProbe ); + pointProbe->addStatistic(Statistic::Means); + pointProbe->addStatistic(Statistic::Variances); + para->addProbe( pointProbe ); - // SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", para->getOutputPath(), 100, 500, 100, 100) ); - // planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z); - // planeProbe->addStatistic(Statistic::Means); - // para->addProbe( planeProbe ); + SPtr<PlaneProbe> planeProbe = std::make_shared<PlaneProbe>("planeProbe", para->getOutputPath(), 100, 500, 100, 100); + planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z); + planeProbe->addStatistic(Statistic::Means); + para->addProbe( planeProbe ); auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para); auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator); - Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory); + Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory); sim.run(); } diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp index 991025b649d69305c030fe2f1dd1763a2137af9b..1cd1ba068f6ee184f2550d13bf8b4896e5b9ff63 100644 --- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp +++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp @@ -19,6 +19,7 @@ #include "Core/VectorTypes.h" #include <basics/config/ConfigurationFile.h> +#include "lbm/constants/NumericConstants.h" #include <logger/Logger.h> @@ -28,12 +29,13 @@ #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" #include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" + #include "GridGenerator/grid/GridFactory.h" #include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" #include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" -#include "GridGenerator/io/STLReaderWriter/STLReader.h" -#include "GridGenerator/io/STLReaderWriter/STLWriter.h" +#include "GridGenerator/VelocitySetter/VelocitySetter.h" ////////////////////////////////////////////////////////////////////////// @@ -44,11 +46,11 @@ #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" #include "VirtualFluids_GPU/Parameter/Parameter.h" #include "VirtualFluids_GPU/Output/FileWriter.h" -#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h" #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" @@ -60,8 +62,9 @@ std::string path("."); -std::string simulationName("BoundayLayer"); +std::string simulationName("BoundaryLayer"); +using namespace vf::lbm::constant; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -100,24 +103,48 @@ void multipleLevel(const std::string& configPath) LbmOrGks lbmOrGks = LBM; - const real H = 1000.0; // boundary layer height in m + const real H = config.getValue("boundaryLayerHeight", 1000.0); // boundary layer height in m const real L_x = 6*H; const real L_y = 4*H; - const real L_z = 1*H; + const real L_z = H; + + const real z0 = config.getValue("z0", 0.1f); // roughness length in m + const real u_star = config.getValue("u_star", 0.4f); //friction velocity in m/s + const real kappa = config.getValue("vonKarmanConstant", 0.4f); // von Karman constant - const real z0 = 0.1; // roughness length in m - const real u_star = 0.4; //friction velocity in m/s - const real kappa = 0.4; // von Karman constant + const real viscosity = config.getValue("viscosity", 1.56e-5f); - const real viscosity = 1.56e-5; + const real velocity = 0.5f*u_star/kappa*log(H/z0+1.f); //0.5 times max mean velocity at the top in m/s - const real velocity = 0.5*u_star/kappa*log(L_z/z0); //0.5 times max mean velocity at the top in m/s + const real mach = config.getValue<real>("Ma", 0.1); - const real mach = config.contains("Ma")? config.getValue<real>("Ma"): 0.1; + const uint nodes_per_H = config.getValue<uint>("nz", 64); - const uint nodes_per_H = config.contains("nz")? config.getValue<uint>("nz"): 64; + const bool writePrecursor = config.getValue("writePrecursor", false); + bool useDistributions; + std::string precursorDirectory; + int nTWritePrecursor; real tStartPrecursor, posXPrecursor; + if(writePrecursor) + { + nTWritePrecursor = config.getValue<int>("nTimestepsWritePrecursor"); + tStartPrecursor = config.getValue<real>("tStartPrecursor"); + posXPrecursor = config.getValue<real>("posXPrecursor"); + useDistributions = config.getValue<bool>("useDistributions", false); + precursorDirectory = config.getValue<std::string>("precursorDirectory"); + + } + + const bool readPrecursor = config.getValue("readPrecursor", false); + int nTReadPrecursor; + if(readPrecursor) + { + nTReadPrecursor = config.getValue<int>("nTimestepsReadPrecursor"); + precursorDirectory = config.getValue<std::string>("precursorDirectory"); + useDistributions = config.getValue<bool>("useDistributions", false); + + } // all in s const float tStartOut = config.getValue<real>("tStartOut"); const float tOut = config.getValue<real>("tOut"); @@ -130,7 +157,7 @@ void multipleLevel(const std::string& configPath) const float tOutProbe = config.getValue<real>("tOutProbe"); - const real dx = L_z/real(nodes_per_H); + const real dx = H/real(nodes_per_H); const real dt = dx * mach / (sqrt(3) * velocity); @@ -172,11 +199,8 @@ void multipleLevel(const std::string& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - SPtr<TurbulenceModelFactory> tmFactory = SPtr<TurbulenceModelFactory>( new TurbulenceModelFactory(para) ); + SPtr<TurbulenceModelFactory> tmFactory = std::make_shared<TurbulenceModelFactory>(para); tmFactory->readConfigFile( config ); - - // tmFactory->setTurbulenceModel(TurbulenceModel::AMD); - // tmFactory->setModelConstant(config.getValue<real>("SGSconstant")); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -187,38 +211,64 @@ void multipleLevel(const std::string& configPath) // gridBuilder->addGrid( new Cuboid( 0.0, 0.0, 0.0, L_x, L_y, 0.3*L_z) , 1 ); // para->setMaxLevel(2); - gridBuilder->setPeriodicBoundaryCondition(true, true, false); + gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false); gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!! uint samplingOffset = 2; - // gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0); - gridBuilder->setStressBoundaryCondition(SideType::MZ, + + if(readPrecursor) + { + auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK); + gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, nTReadPrecursor); + + gridBuilder->setStressBoundaryCondition(SideType::MZ, 0.0, 0.0, 1.0, // wall normals samplingOffset, z0/dx); // wall model settinng - para->setHasWallModelMonitor(true); - bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack); + para->setHasWallModelMonitor(true); + + gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0f, 0.0f, -1.0f); + + + gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.f); + } + else + { + gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, -1.0); - gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0); + gridBuilder->setStressBoundaryCondition(SideType::MZ, + 0.0, 0.0, 1.0, // wall normals + samplingOffset, z0/dx); // wall model settinng + para->setHasWallModelMonitor(true); + } + + + + bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack); bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); - + bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective); + bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor); + para->setOutflowPressureCorrectionFactor(0.0); + + - real cPi = 3.1415926535897932384626433832795; para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { rho = (real)0.0; - vx = (u_star/0.4 * log(coordZ/z0) + 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt / dx; - vy = 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt / dx; - vz = 8.0*u_star/0.4*(sin(cPi*8.0*coordY/H)*sin(cPi*8.0*coordZ/H)+sin(cPi*8.0*coordX/L_x))/(pow(L_z/2.0-coordZ, c2o1)+c1o1) * dt / dx; + vx = rho = c0o1; + vx = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt/dx; + vy = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt/dx; + vz = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) * dt/dx; }); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("planeProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') ); + SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("horizontalPlanes", para->getOutputPath(), 0, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') ); planarAverageProbe->addAllAvailableStatistics(); planarAverageProbe->setFileNameToNOut(); para->addProbe( planarAverageProbe ); - para->setHasWallModelMonitor(true); - SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) ); + SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), 0, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) ); wallModelProbe->addAllAvailableStatistics(); wallModelProbe->setFileNameToNOut(); wallModelProbe->setForceOutputToStress(true); @@ -226,6 +276,46 @@ void multipleLevel(const std::string& configPath) wallModelProbe->setEvaluatePressureGradient(true); para->addProbe( wallModelProbe ); + SPtr<PlaneProbe> planeProbe1 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_1", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe1->setProbePlane(100.0, 0.0, 0, dx, L_y, L_z); + planeProbe1->addAllAvailableStatistics(); + para->addProbe( planeProbe1 ); + + if(readPrecursor) + { + SPtr<PlaneProbe> planeProbe2 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_2", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe2->setProbePlane(1000.0, 0.0, 0, dx, L_y, L_z); + planeProbe2->addAllAvailableStatistics(); + para->addProbe( planeProbe2 ); + + SPtr<PlaneProbe> planeProbe3 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_3", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe3->setProbePlane(1500.0, 0.0, 0, dx, L_y, L_z); + planeProbe3->addAllAvailableStatistics(); + para->addProbe( planeProbe3 ); + + SPtr<PlaneProbe> planeProbe4 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_4", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe4->setProbePlane(2000.0, 0.0, 0, dx, L_y, L_z); + planeProbe4->addAllAvailableStatistics(); + para->addProbe( planeProbe4 ); + + SPtr<PlaneProbe> planeProbe5 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_5", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe5->setProbePlane(2500.0, 0.0, 0, dx, L_y, L_z); + planeProbe5->addAllAvailableStatistics(); + para->addProbe( planeProbe5 ); + + SPtr<PlaneProbe> planeProbe6 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_6", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe6->setProbePlane(0.0, L_y/2.0, 0, L_x, dx, L_z); + planeProbe6->addAllAvailableStatistics(); + para->addProbe( planeProbe6 ); + } + + + if(writePrecursor) + { + SPtr<PrecursorWriter> precursorWriter = std::make_shared<PrecursorWriter>("precursor", para->getOutputPath()+precursorDirectory, posXPrecursor, 0, L_y, 0, L_z, tStartPrecursor/dt, nTWritePrecursor, useDistributions? OutputVariable::Distributions: OutputVariable::Velocities); + para->addProbe(precursorWriter); + } + auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para); auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator); diff --git a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt index a489f0ab89738a193b16fee41c212a5943f6525d..83e7861a5fb85ea800d187699f1c6c1409422f0a 100644 --- a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt +++ b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt @@ -7,7 +7,7 @@ Path = . ################################################## GridPath = . ################################################## -Devices = 1 +Devices = 0 ################################################## tStartOut = 0 tOut = 100000 @@ -28,3 +28,15 @@ SGSconstant = 0.2 QuadricLimiterP = 100000.0 QuadricLimiterM = 100000.0 QuadricLimiterD = 100000.0 + +################################################## +readPrecursor = false +nTimestepsReadPrecursor = 10 +precursorFile = precursor/Precursor + +################################################## +writePrecursor = false +nTimestepsWritePrecursor = 10 + +tStartPrecursor = 100 +posXPrecursor = 3000 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8fcb7926102d188b44d8c74084235b6f175edf80..4353b019615408705c1896f632291c17f5720c07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,10 @@ [build-system] -requires = ["setuptools", "wheel", "scikit-build"] \ No newline at end of file +requires = [ + "wheel", + "cmake>=3.1.0", + "setuptools", + "setuptools_scm[toml]", + "cmake_build_extension" +] +build-backend = "setup_builder" +backend-path = ["utilities"] \ No newline at end of file diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt index 5a84adef027fdfa2953e016693bb64570e48c1ef..f56b2e89ee89bdac76d2f98773d47948bc360aa2 100644 --- a/pythonbindings/CMakeLists.txt +++ b/pythonbindings/CMakeLists.txt @@ -1,24 +1,27 @@ project(VirtualFluidsPython LANGUAGES CUDA CXX) IF(BUILD_VF_GPU) - pybind11_add_module(pyfluids src/VirtualFluidsModulesGPU.cpp) + pybind11_add_module(python_bindings MODULE src/VirtualFluidsModulesGPU.cpp) + set_target_properties(python_bindings PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pyfluids + OUTPUT_NAME "bindings") set_source_files_properties(src/VirtualFluidsModulesGPU.cpp PROPERTIES LANGUAGE CUDA) - target_link_libraries(pyfluids PRIVATE GridGenerator VirtualFluids_GPU basics lbmCuda logger) - target_include_directories(pyfluids PRIVATE ${VF_THIRD_DIR}/cuda_samples/) + target_link_libraries(python_bindings PRIVATE GridGenerator VirtualFluids_GPU basics lbmCuda logger) + target_include_directories(python_bindings PRIVATE ${VF_THIRD_DIR}/cuda_samples/) ENDIF() IF(BUILD_VF_CPU) - pybind11_add_module(pyfluids src/VirtualFluidsModulesCPU.cpp) + pybind11_add_module(python_bindings src/VirtualFluidsModulesCPU.cpp) pybind11_add_module(pymuparser src/muParser.cpp) # TODO: Move this to MuParser CMakeLists.txt set_target_properties(muparser PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(pyfluids PRIVATE VF_METIS VF_MPI) + target_compile_definitions(python_bindings PRIVATE VF_METIS VF_MPI) target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI) - target_link_libraries(pyfluids PRIVATE simulationconfig VirtualFluidsCore muparser basics) + target_link_libraries(python_bindings PRIVATE simulationconfig VirtualFluidsCore muparser basics) target_link_libraries(pymuparser PRIVATE muparser) ENDIF() -target_include_directories(pyfluids PRIVATE ${CMAKE_SOURCE_DIR}/src/) -target_include_directories(pyfluids PRIVATE ${CMAKE_BINARY_DIR}) \ No newline at end of file +target_include_directories(python_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/) +target_include_directories(python_bindings PRIVATE ${CMAKE_BINARY_DIR}) \ No newline at end of file diff --git a/pythonbindings/src/VirtualFluidsModulesCPU.cpp b/pythonbindings/src/VirtualFluidsModulesCPU.cpp index 2fba3da494f568f7d0d0a117a579a45c9c1b9245..9201a8ce9ab2f0e61b64ec0263185e5642feca18 100644 --- a/pythonbindings/src/VirtualFluidsModulesCPU.cpp +++ b/pythonbindings/src/VirtualFluidsModulesCPU.cpp @@ -5,7 +5,7 @@ namespace py_bindings { namespace py = pybind11; - PYBIND11_MODULE(pyfluids, m) + PYBIND11_MODULE(bindings, m) { cpu::makeModule(m); } diff --git a/pythonbindings/src/VirtualFluidsModulesGPU.cpp b/pythonbindings/src/VirtualFluidsModulesGPU.cpp index b96971caf381faada76ee676cf60469492d055c2..e0320115e1cf1fcb8c60d19af5a51f3fe92d7562 100644 --- a/pythonbindings/src/VirtualFluidsModulesGPU.cpp +++ b/pythonbindings/src/VirtualFluidsModulesGPU.cpp @@ -8,7 +8,7 @@ namespace py_bindings { namespace py = pybind11; - PYBIND11_MODULE(pyfluids, m) + PYBIND11_MODULE(bindings, m) { basics::makeModule(m); gpu::makeModule(m); diff --git a/pythonbindings/src/basics/submodules/configuration_file.cpp b/pythonbindings/src/basics/submodules/configuration_file.cpp index f5a2f87135a17f5eda34a7467d95f9db6b1c21d1..ad30864a41aa6038f3021bdd4d159ca7ee993ec5 100644 --- a/pythonbindings/src/basics/submodules/configuration_file.cpp +++ b/pythonbindings/src/basics/submodules/configuration_file.cpp @@ -1,5 +1,5 @@ #include <pybind11/pybind11.h> -#include <basics/config/ConfigurationFile.h> +#include "basics/config/ConfigurationFile.h" namespace configuration { @@ -9,6 +9,19 @@ namespace configuration { py::class_<vf::basics::ConfigurationFile>(parentModule, "ConfigurationFile") .def(py::init<>()) - .def("load", &vf::basics::ConfigurationFile::load); + .def("load", &vf::basics::ConfigurationFile::load) + .def("contains", &vf::basics::ConfigurationFile::contains) + .def("get_int_value" , static_cast<int (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_int_value" , static_cast<int (vf::basics::ConfigurationFile::*)(const std::string&, int ) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_uint_value" , static_cast<uint (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_uint_value" , static_cast<uint (vf::basics::ConfigurationFile::*)(const std::string&, uint ) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_float_value" , static_cast<float (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_float_value" , static_cast<float (vf::basics::ConfigurationFile::*)(const std::string&, float ) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_double_value", static_cast<double (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_double_value", static_cast<double (vf::basics::ConfigurationFile::*)(const std::string&, double ) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_bool_value" , static_cast<bool (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_bool_value" , static_cast<bool (vf::basics::ConfigurationFile::*)(const std::string&, bool ) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_string_value", static_cast<std::string (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue)) + .def("get_string_value", static_cast<std::string (vf::basics::ConfigurationFile::*)(const std::string&, std::string) const>(&vf::basics::ConfigurationFile::getValue)); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/gpu.cpp b/pythonbindings/src/gpu/gpu.cpp index c99b59d153e1afc4bad15b74192212a96e45718b..be236654782c5538f9e50f4ead3185c169d7b65c 100644 --- a/pythonbindings/src/gpu/gpu.cpp +++ b/pythonbindings/src/gpu/gpu.cpp @@ -6,9 +6,12 @@ #include "submodules/boundary_conditions.cpp" #include "submodules/communicator.cpp" #include "submodules/cuda_memory_manager.cpp" +#include "submodules/probes.cpp" +#include "submodules/precursor_writer.cpp" #include "submodules/grid_provider.cpp" #include "submodules/grid_generator.cpp" -#include "submodules/probes.cpp" +#include "submodules/turbulence_models.cpp" +#include "submodules/velocity_setter.cpp" namespace gpu { @@ -23,11 +26,14 @@ namespace gpu actuator_line::makeModule(gpuModule); actuator_farm::makeModule(gpuModule); boundary_conditions::makeModule(gpuModule); + velocity_setter::makeModule(gpuModule); communicator::makeModule(gpuModule); cuda_memory_manager::makeModule(gpuModule); - grid_provider::makeModule(gpuModule); probes::makeModule(gpuModule); + precursor_writer::makeModule(gpuModule); grid_generator::makeModule(gpuModule); + grid_provider::makeModule(gpuModule); + turbulence_model::makeModule(gpuModule); return gpuModule; } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/actuator_line.cpp b/pythonbindings/src/gpu/submodules/actuator_line.cpp index 3207fadbc37df38e53e00adcb9a86f0b8e82ba98..c489654fd093881a068ebbd69294c4bd83847efb 100644 --- a/pythonbindings/src/gpu/submodules/actuator_line.cpp +++ b/pythonbindings/src/gpu/submodules/actuator_line.cpp @@ -1,8 +1,10 @@ #include <pybind11/pybind11.h> #include <pybind11/stl.h> #include <pybind11/numpy.h> -#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h> #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h> +#include <cstdint> + class PyActuatorLine : public ActuatorLine { public: @@ -12,12 +14,14 @@ public: PYBIND11_OVERRIDE_NAME(void, ActuatorLine, "calc_blade_forces", calcBladeForces,); } }; + namespace actuator_line { namespace py = pybind11; void makeModule(py::module_ &parentModule) { + using arr = py::array_t<float, py::array::c_style>; py::class_<ActuatorLine, PreCollisionInteractor, PyActuatorLine, std::shared_ptr<ActuatorLine>>(parentModule, "ActuatorLine", py::dynamic_attr()) @@ -29,7 +33,8 @@ namespace actuator_line const real, int, const real, - const real>(), + const real, + const bool>(), "n_blades", "density", "n_blade_nodes", @@ -38,7 +43,8 @@ namespace actuator_line "diameter", "level", "delta_t", - "delta_x") + "delta_x", + "use_host_arrays") .def_property("omega", &ActuatorLine::getOmega, &ActuatorLine::setOmega) .def_property("azimuth", &ActuatorLine::getAzimuth, &ActuatorLine::setAzimuth) .def_property("yaw", &ActuatorLine::getYaw, &ActuatorLine::setYaw) @@ -47,6 +53,8 @@ namespace actuator_line .def_property_readonly("n_nodes", &ActuatorLine::getNNodes) .def_property_readonly("n_indices", &ActuatorLine::getNIndices) .def_property_readonly("density", &ActuatorLine::getDensity) + .def_property_readonly("delta_t", &ActuatorLine::getDeltaT) + .def_property_readonly("delta_x", &ActuatorLine::getDeltaX) .def_property_readonly("position_x", &ActuatorLine::getPositionX) .def_property_readonly("position_y", &ActuatorLine::getPositionY) .def_property_readonly("position_z", &ActuatorLine::getPositionZ) @@ -61,12 +69,40 @@ namespace actuator_line .def("get_blade_forces_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesX()); } ) .def("get_blade_forces_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesY()); } ) .def("get_blade_forces_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesZ()); } ) - .def("set_blade_coords", [](ActuatorLine& al, arr coordsX, arr coordsY, arr coordsZ){ - al.setBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); } ) - .def("set_blade_velocities", [](ActuatorLine& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ){ - al.setBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); } ) - .def("set_blade_forces", [](ActuatorLine& al, arr forcesX, arr forcesY, arr forcesZ){ - al.setBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); } ) + .def("get_blade_coords_x_device", [](ActuatorLine& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeCoordsXD()); }, py::return_value_policy::reference) + .def("get_blade_coords_y_device", [](ActuatorLine& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeCoordsYD()); }, py::return_value_policy::reference) + .def("get_blade_coords_z_device", [](ActuatorLine& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeCoordsZD()); }, py::return_value_policy::reference) + .def("get_blade_velocities_x_device", [](ActuatorLine& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeVelocitiesXD()); }, py::return_value_policy::reference) + .def("get_blade_velocities_y_device", [](ActuatorLine& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeVelocitiesYD()); }, py::return_value_policy::reference) + .def("get_blade_velocities_z_device", [](ActuatorLine& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeVelocitiesZD()); }, py::return_value_policy::reference) + .def("get_blade_forces_x_device", [](ActuatorLine& al)-> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeForcesXD()); }, py::return_value_policy::reference ) + .def("get_blade_forces_y_device", [](ActuatorLine& al)-> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeForcesYD()); }, py::return_value_policy::reference ) + .def("get_blade_forces_z_device", [](ActuatorLine& al)-> intptr_t { return reinterpret_cast<intptr_t>(al.getBladeForcesZD()); }, py::return_value_policy::reference ) + .def("set_preinit_blade_radii", [](ActuatorLine& al, arr radii){ al.setPreInitBladeRadii(static_cast<float *>(radii.request().ptr)); } ) + .def("set_blade_coords", [](ActuatorLine& al, arr coordsX, arr coordsY, arr coordsZ) + { + al.setBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); + }) + .def("set_blade_velocities", [](ActuatorLine& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ) + { + al.setBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); + }) + .def("set_blade_forces", [](ActuatorLine& al, arr forcesX, arr forcesY, arr forcesZ) + { + al.setBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); + }) + // .def("set_blade_coords_device", [](ActuatorLine& al, arr coordsX, arr coordsY, arr coordsZ) + // { + // al.setBladeCoordsD(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); + // }) + // .def("set_blade_velocities_device", [](ActuatorLine& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ) + // { + // al.setBladeVelocitiesD(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); + // }) + // .def("set_blade_forces_device", [](ActuatorLine& al, arr forcesX, arr forcesY, arr forcesZ) + // { + // al.setBladeForcesD(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); + // }) .def("calc_blade_forces", &ActuatorLine::calcBladeForces); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp index 8f941a8705c225275d25291205ebdaeef8de5c9e..9ab758ffd6e9fc68e03eef40676508c093567df2 100644 --- a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp +++ b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp @@ -1,5 +1,6 @@ #include <pybind11/pybind11.h> #include <gpu/GridGenerator/grid/BoundaryConditions/Side.h> +#include "gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h" namespace boundary_conditions { @@ -14,7 +15,59 @@ namespace boundary_conditions .value("PY", SideType::PY) .value("MZ", SideType::MZ) .value("PZ", SideType::PZ) - .value("GEOMETRY", SideType::GEOMETRY) - .export_values(); + .value("GEOMETRY", SideType::GEOMETRY); + + py::class_<BoundaryConditionFactory>(parentModule, "BoundaryConditionFactory") + .def(py::init<>()) + .def("set_velocity_boundary_condition", &BoundaryConditionFactory::setVelocityBoundaryCondition) + .def("set_no_slip_boundary_condition", &BoundaryConditionFactory::setNoSlipBoundaryCondition) + .def("set_slip_boundary_condition", &BoundaryConditionFactory::setSlipBoundaryCondition) + .def("set_pressure_boundary_condition", &BoundaryConditionFactory::setPressureBoundaryCondition) + .def("set_stress_boundary_condition", &BoundaryConditionFactory::setStressBoundaryCondition) + .def("set_precursor_boundary_condition", &BoundaryConditionFactory::setPrecursorBoundaryCondition) + .def("set_geometry_boundary_condition", &BoundaryConditionFactory::setGeometryBoundaryCondition); + + py::enum_<BoundaryConditionFactory::VelocityBC>(parentModule, "VelocityBC") + .value("VelocitySimpleBounceBackCompressible", BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible) + .value("VelocityIncompressible", BoundaryConditionFactory::VelocityBC::VelocityIncompressible) + .value("VelocityCompressible", BoundaryConditionFactory::VelocityBC::VelocityCompressible) + .value("VelocityAndPressureCompressible", BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible) + .value("NotSpecified", BoundaryConditionFactory::VelocityBC::NotSpecified); + + + py::enum_<BoundaryConditionFactory::NoSlipBC>(parentModule, "NoSlipBC") + .value("NoSlipImplicitBounceBack", BoundaryConditionFactory::NoSlipBC::NoSlipImplicitBounceBack) + .value("NoSlipBounceBack", BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack) + .value("NoSlipIncompressible", BoundaryConditionFactory::NoSlipBC::NoSlipIncompressible) + .value("NoSlipCompressible", BoundaryConditionFactory::NoSlipBC::NoSlipCompressible) + .value("NoSlip3rdMomentsCompressible", BoundaryConditionFactory::NoSlipBC::NoSlip3rdMomentsCompressible); + + py::enum_<BoundaryConditionFactory::SlipBC>(parentModule, "SlipBC") + .value("SlipIncompressible", BoundaryConditionFactory::SlipBC::SlipIncompressible) + .value("SlipCompressible", BoundaryConditionFactory::SlipBC::SlipCompressible) + .value("SlipBounceBack", BoundaryConditionFactory::SlipBC::SlipBounceBack) + .value("SlipCompressibleTurbulentViscosity", BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity) + .value("SlipPressureCompressibleTurbulentViscosity", BoundaryConditionFactory::SlipBC::SlipPressureCompressibleTurbulentViscosity) + .value("NotSpecified", BoundaryConditionFactory::SlipBC::NotSpecified); + + py::enum_<BoundaryConditionFactory::PressureBC>(parentModule, "PressureBC") + .value("PressureEquilibrium", BoundaryConditionFactory::PressureBC::PressureEquilibrium) + .value("PressureEquilibrium2", BoundaryConditionFactory::PressureBC::PressureEquilibrium2) + .value("PressureNonEquilibriumIncompressible", BoundaryConditionFactory::PressureBC::PressureNonEquilibriumIncompressible) + .value("PressureNonEquilibriumCompressible", BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible) + .value("OutflowNonReflective", BoundaryConditionFactory::PressureBC::OutflowNonReflective) + .value("OutflowNonReflectivePressureCorrection", BoundaryConditionFactory::PressureBC::OutflowNonReflectivePressureCorrection) + .value("NotSpecified", BoundaryConditionFactory::PressureBC::NotSpecified); + + py::enum_<BoundaryConditionFactory::StressBC>(parentModule, "StressBC") + .value("StressCompressible", BoundaryConditionFactory::StressBC::StressCompressible) + .value("StressBounceBack", BoundaryConditionFactory::StressBC::StressBounceBack) + .value("StressPressureBounceBack", BoundaryConditionFactory::StressBC::StressPressureBounceBack) + .value("NotSpecified", BoundaryConditionFactory::StressBC::NotSpecified); + + py::enum_<BoundaryConditionFactory::PrecursorBC>(parentModule, "PrecursorBC") + .value("VelocityPrecursor", BoundaryConditionFactory::PrecursorBC::VelocityPrecursor) + .value("DistributionsPrecursor", BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor) + .value("NotSpecified", BoundaryConditionFactory::PrecursorBC::NotSpecified); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp index 579c06c4e00cae9646ced8b554d71631eeb7e793..a62247aa9603f544ffadbe12442597746f75374d 100644 --- a/pythonbindings/src/gpu/submodules/grid_generator.cpp +++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp @@ -51,6 +51,7 @@ namespace grid_generator .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition) .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition) .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition) + .def("set_precursor_boundary_condition", &LevelGridBuilder::setPrecursorBoundaryCondition) .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition); py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(gridGeneratorModule, "MultipleGridBuilder") diff --git a/pythonbindings/src/gpu/submodules/grid_provider.cpp b/pythonbindings/src/gpu/submodules/grid_provider.cpp index 02ff273e2cd1a2022943e19c9a48a447d9dfe54b..bcfff5a15d3fa2306a3a1a3d1083a99f39e1d977 100644 --- a/pythonbindings/src/gpu/submodules/grid_provider.cpp +++ b/pythonbindings/src/gpu/submodules/grid_provider.cpp @@ -1,8 +1,5 @@ #include <pybind11/pybind11.h> #include "gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" -// #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h> -// #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h> -// #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h" namespace grid_provider { diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp index 7b4e67f101e3928abbd4262557864ea1d0f45b02..ba3e1b7cf94dee503deca96f32024509bc13c7d8 100644 --- a/pythonbindings/src/gpu/submodules/parameter.cpp +++ b/pythonbindings/src/gpu/submodules/parameter.cpp @@ -13,42 +13,43 @@ namespace parameter { py::class_<Parameter, std::shared_ptr<Parameter>>(parentModule, "Parameter") .def(py::init< - const vf::basics::ConfigurationFile&, int, - int - >(), - "config_data", + int, + std::optional<const vf::basics::ConfigurationFile*>>(), + "number_of_processes", + "my_ID", + "config_data") + .def(py::init<int, int>(), "number_of_processes", "my_ID") + .def(py::init<const vf::basics::ConfigurationFile*>(), "config_data") .def("set_forcing", &Parameter::setForcing) + .def("set_quadric_limiters", &Parameter::setQuadricLimiters) .def("set_diff_on", &Parameter::setDiffOn) .def("set_comp_on", &Parameter::setCompOn) .def("set_max_level", &Parameter::setMaxLevel) - .def("set_t_end", &Parameter::setTEnd) - .def("set_t_out", &Parameter::setTOut) - .def("set_t_start_out", &Parameter::setTStartOut) + .def("set_timestep_end", &Parameter::setTimestepEnd) + .def("set_timestep_out", &Parameter::setTimestepOut) + .def("set_timestep_start_out", &Parameter::setTimestepStartOut) .def("set_timestep_of_coarse_level", &Parameter::setTimestepOfCoarseLevel) + .def("set_calc_turbulence_intensity", &Parameter::setCalcTurbulenceIntensity) .def("set_output_path", &Parameter::setOutputPath) .def("set_output_prefix", &Parameter::setOutputPrefix) - .def("set_f_name", &Parameter::setFName) + .def("set_print_files", &Parameter::setOutflowPressureCorrectionFactor) .def("set_print_files", &Parameter::setPrintFiles) .def("set_temperature_init", &Parameter::setTemperatureInit) .def("set_temperature_BC", &Parameter::setTemperatureBC) - .def("set_viscosity", &Parameter::setViscosity) - .def("set_velocity", &Parameter::setVelocity) + .def("set_viscosity_LB", &Parameter::setViscosityLB) + .def("set_velocity_LB", &Parameter::setVelocityLB) .def("set_viscosity_ratio", &Parameter::setViscosityRatio) .def("set_velocity_ratio", &Parameter::setVelocityRatio) .def("set_density_ratio", &Parameter::setDensityRatio) .def("set_devices", &Parameter::setDevices) .def("set_is_body_force", &Parameter::setIsBodyForce) - .def("set_use_AMD", &Parameter::setUseAMD) - .def("set_use_Wale", &Parameter::setUseWale) - .def("set_SGS_constant", &Parameter::setSGSConstant) .def("set_main_kernel", &Parameter::setMainKernel) .def("set_AD_kernel", &Parameter::setADKernel) - .def("set_use_AMD", &Parameter::setUseAMD) - .def("set_use_Wale", &Parameter::setUseWale) - .def("set_SGS_constant", &Parameter::setSGSConstant) + .def("set_has_wall_monitor", &Parameter::setHasWallModelMonitor) + .def("set_outflow_pressure_correction_factor", &Parameter::setOutflowPressureCorrectionFactor) .def("set_initial_condition", [](Parameter ¶, std::function<std::vector<float>(real, real, real)> &init_func) { para.setInitialCondition([init_func](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) @@ -60,6 +61,43 @@ namespace parameter vz = values[3]; }); }) + .def("set_initial_condition_uniform", [](Parameter ¶, real velocity_x, real velocity_y, real velocity_z) + { + para.setInitialCondition([velocity_x, velocity_y, velocity_z](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) // must capture values explicitly! + { + rho = c0o1; + vx = velocity_x; + vy = velocity_y; + vz = velocity_z; + }); + }) + .def("set_initial_condition_log_law", [](Parameter ¶, real u_star, real z0, real velocityRatio) + { + para.setInitialCondition( + [u_star, z0, velocityRatio](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) + { + coordZ = coordZ > c0o1 ? coordZ : c0o1; + + rho = c0o1; + vx = u_star/c4o10 * log(coordZ/z0+c1o1) / velocityRatio; + vy = c0o1; + vz = c0o1; + } + ); + }) + .def("set_initial_condition_perturbed_log_law", [](Parameter ¶, real u_star, real z0, real L_x, real L_z, real H, real velocityRatio) + { + para.setInitialCondition( + [u_star, z0, L_x, L_z, H, velocityRatio](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) + { + coordZ = coordZ > c0o1 ? coordZ : c0o1; + rho = c0o1; + vx = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) / velocityRatio; + vy = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) / velocityRatio; + vz = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) / velocityRatio; + } + ); + }) .def("add_actuator", &Parameter::addActuator) .def("add_probe", &Parameter::addProbe) .def("get_output_path", &Parameter::getOutputPath) @@ -70,11 +108,10 @@ namespace parameter .def("get_viscosity_ratio", &Parameter::getViscosityRatio) .def("get_density_ratio", &Parameter::getDensityRatio) .def("get_force_ratio", &Parameter::getForceRatio) - .def("get_use_AMD", &Parameter::getUseAMD) - .def("get_use_Wale", &Parameter::getUseWale) .def("get_SGS_constant", &Parameter::getSGSConstant) .def("get_is_body_force", &Parameter::getIsBodyForce) .def("set_has_wall_model_monitor", &Parameter::setHasWallModelMonitor) ; + } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/precursor_writer.cpp b/pythonbindings/src/gpu/submodules/precursor_writer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0e45e65d4d81246d47a76cf19bc14c74ec17a4af --- /dev/null +++ b/pythonbindings/src/gpu/submodules/precursor_writer.cpp @@ -0,0 +1,35 @@ +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> +#include <pybind11/numpy.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h> + +namespace precursor_writer +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + py::enum_<OutputVariable>(parentModule, "OutputVariable") + .value("Velocities", OutputVariable::Velocities) + .value("Distributions", OutputVariable::Distributions); + + py::class_<PrecursorWriter, PreCollisionInteractor, std::shared_ptr<PrecursorWriter>>(parentModule, "PrecursorWriter") + .def(py::init < std::string, + std::string, + real, + real, real, + real, real, + uint, uint, + OutputVariable, + uint>(), + "filename" + "output_path", + "x_pos", + "y_min", "y_max", + "z_min", "z_max", + "t_start_out", "t_save", + "output_variable", + "max_timesteps_per_file"); + } +} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/simulation.cpp b/pythonbindings/src/gpu/submodules/simulation.cpp index b775d604ba41530223f22738c72785b2c15348b3..88716ebb38f765a94ae4c6c42e27eae4c93e1adb 100644 --- a/pythonbindings/src/gpu/submodules/simulation.cpp +++ b/pythonbindings/src/gpu/submodules/simulation.cpp @@ -8,6 +8,8 @@ #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h> #include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h> #include <gpu/VirtualFluids_GPU/Output/DataWriter.h> +#include "gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h" +#include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" namespace simulation { @@ -20,11 +22,25 @@ namespace simulation .def(py::init< std::shared_ptr<Parameter>, std::shared_ptr<CudaMemoryManager>, vf::gpu::Communicator &, - GridProvider &>(), + GridProvider &, + BoundaryConditionFactory*>(), "parameter", "memoryManager", "communicator", - "gridProvider") + "gridProvider", + "bcFactory") + .def(py::init< std::shared_ptr<Parameter>, + std::shared_ptr<CudaMemoryManager>, + vf::gpu::Communicator &, + GridProvider &, + BoundaryConditionFactory*, + std::shared_ptr<TurbulenceModelFactory>>(), + "parameter", + "memoryManager", + "communicator", + "gridProvider", + "bcFactory", + "tmFactory") .def("run", &Simulation::run) .def("addKineticEnergyAnalyzer", &Simulation::addKineticEnergyAnalyzer) .def("addEnstrophyAnalyzer", &Simulation::addEnstrophyAnalyzer); diff --git a/pythonbindings/src/gpu/submodules/turbulence_models.cpp b/pythonbindings/src/gpu/submodules/turbulence_models.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be9173c0ec206cecb9f602276d9234a2e9064372 --- /dev/null +++ b/pythonbindings/src/gpu/submodules/turbulence_models.cpp @@ -0,0 +1,24 @@ +#include "pybind11/pybind11.h" +#include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" +#include "gpu/VirtualFluids_GPU/LBM/LB.h" + +namespace turbulence_model +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + py::enum_<TurbulenceModel>(parentModule, "TurbulenceModel") + .value("Smagorinsky", TurbulenceModel::Smagorinsky) + .value("AMD", TurbulenceModel::AMD) + .value("QR", TurbulenceModel::QR) + .value("None", TurbulenceModel::None); + + py::class_<TurbulenceModelFactory, std::shared_ptr<TurbulenceModelFactory>>(parentModule, "TurbulenceModelFactory") + .def(py::init< std::shared_ptr<Parameter>>(), "para") + .def("set_turbulence_model", &TurbulenceModelFactory::setTurbulenceModel) + .def("set_model_constant", &TurbulenceModelFactory::setModelConstant) + .def("read_config_file", &TurbulenceModelFactory::readConfigFile); + + } +} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/velocity_setter.cpp b/pythonbindings/src/gpu/submodules/velocity_setter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..17114961b5b01e8a6e52144e3a89307708cc5a0b --- /dev/null +++ b/pythonbindings/src/gpu/submodules/velocity_setter.cpp @@ -0,0 +1,20 @@ +#include <pybind11/pybind11.h> +#include <gpu/GridGenerator/VelocitySetter/VelocitySetter.h> + +namespace velocity_setter +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + py::enum_<FileType>(parentModule, "FileType") + .value("VTK", FileType::VTK); + + parentModule.def("create_file_collection", &createFileCollection); + + py::class_<VelocityFileCollection, std::shared_ptr<VelocityFileCollection>>(parentModule, "VelocityFileCollection"); + + py::class_<VTKFileCollection, VelocityFileCollection, std::shared_ptr<VTKFileCollection>>(parentModule, "VTKFileCollection") + .def(py::init <std::string>(), "prefix"); + } +} \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cf060397a1b4e9b38d16683d0ff98ee9532e4a7e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,21 @@ +[metadata] +name = pyfluids +description = Python binding for VirtualFluids +long_description = file: README.md +long_description_content_type = text/markdown +platforms = any +url = https://git.rz.tu-bs.de/irmb/virtualfluids +version = 0.0.1 + +[options] +zip_safe = False +packages = find: +package_dir = + =pythonbindings +python_requires = >=3.6 +install_requires = + cmake-build-extension + +[options.packages.find] +where = pythonbindings + diff --git a/setup.py b/setup.py index b26e1c13d09447d17f8e9fd6e2cd0d0671595bf3..b5cd97056bd4696df8cfd1df1794f3689c23376f 100644 --- a/setup.py +++ b/setup.py @@ -1,137 +1,54 @@ -import os -import re +import inspect import sys -import platform -import subprocess +from pathlib import Path -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext -from setuptools.command.install import install -from setuptools.command.develop import develop -from distutils.version import LooseVersion +import cmake_build_extension +import setuptools """ Install python wrapper of virtual fluids -Install GPU backend with option --GPU -(pass to pip via --install-option="--GPU") +install via python: + python setup.py install build_ext + set CMAKE Flags via -DBUILD_VF_GPU:BOOL=1 +or install via pip: + pip install -e . + set CMAKE Flags via --configure-settings -DBUILD_VF_GPU=1 """ -vf_cmake_args = [ - "-DBUILD_VF_PYTHON_BINDINGS=ON", - "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", - "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache", - "-DCMAKE_C_COMPILER_LAUNCHER=ccache", - "-DBUILD_SHARED_LIBS=OFF", - "-DBUILD_WARNINGS_AS_ERRORS=OFF" -] - -vf_cpu_cmake_args = [ - "-DBUILD_VF_DOUBLE_ACCURACY=ON", - "-DBUILD_VF_CPU:BOOL=ON", - "-DBUILD_VF_UNIT_TESTS:BOOL=ON", - "-DUSE_METIS=ON", - "-DUSE_MPI=ON" -] - -vf_gpu_cmake_args = [ - "-DBUILD_VF_DOUBLE_ACCURACY=OFF", - "-DBUILD_VF_GPU:BOOL=ON", - "-DBUILD_VF_UNIT_TESTS:BOOL=OFF", -] - -GPU = False - -class CommandMixin: - user_options = [ - ('GPU', None, 'compile pyfluids with GPU backend'), - ] - - def initialize_options(self): - super().initialize_options() - self.GPU = False - - def finalize_options(self): - super().finalize_options() - - def run(self): - global GPU - GPU = GPU or self.GPU - super().run() - - -class InstallCommand(CommandMixin, install): - user_options = getattr(install, 'user_options', []) + CommandMixin.user_options - - -class DevelopCommand(CommandMixin, develop): - user_options = getattr(develop, 'user_options', []) + CommandMixin.user_options - - -class CMakeExtension(Extension): - def __init__(self, name, sourcedir=''): - Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) - - -class CMakeBuild(CommandMixin, build_ext): - user_options = getattr(build_ext, 'user_options', []) + CommandMixin.user_options - - def run(self): - super().run() - try: - out = subprocess.check_output(['cmake', '--version']) - except OSError: - raise RuntimeError("CMake must be installed to build the following extensions: " + - ", ".join(e.name for e in self.extensions)) - - if platform.system() == "Windows": - cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) - if cmake_version < '3.1.0': - raise RuntimeError("CMake >= 3.1.0 is required on Windows") - - for ext in self.extensions: - self.build_extension(ext) - - def build_extension(self, ext): - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - # required for auto-detection of auxiliary "native" libs - if not extdir.endswith(os.path.sep): - extdir += os.path.sep - - cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, - '-DPYTHON_EXECUTABLE=' + sys.executable] - - cfg = 'Debug' if self.debug else 'Release' - build_args = ['--config', cfg] - - if platform.system() == "Windows": - cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] - if sys.maxsize > 2**32: - cmake_args += ['-A', 'x64'] - build_args += ['--', '/m'] - else: - cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j2'] - - cmake_args.extend(vf_cmake_args) - cmake_args.extend(vf_gpu_cmake_args if GPU else vf_cpu_cmake_args) - - env = os.environ.copy() - env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), - self.distribution.get_version()) - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - cmake_cache_file = self.build_temp+"/CMakeCache.txt" - if os.path.exists(cmake_cache_file): - os.remove(cmake_cache_file) - subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) - +init_py = inspect.cleandoc( + """ + import cmake_build_extension + with cmake_build_extension.build_extension_env(): + from .bindings import * + """ +) -setup( - name='pyfluids', - version='0.0.1', - ext_modules=[CMakeExtension('pyfluids')], - cmdclass={"install": InstallCommand, "develop": DevelopCommand, "build_ext": CMakeBuild}, - zip_safe=False, +extra_args = [] +if("cmake_args" in locals()): + extra_args.extend([f"{k}={v}" for k,v in locals()["cmake_args"].items()]) + +setuptools.setup( + ext_modules=[ + cmake_build_extension.CMakeExtension( + name="pyfluids", + install_prefix="pyfluids", + write_top_level_init=init_py, + source_dir=str(Path(__file__).parent.absolute()), + cmake_configure_options = [ + f"-DPython3_ROOT_DIR={Path(sys.prefix)}", + "-DCALL_FROM_SETUP_PY:BOOL=ON", + "-DBUILD_VF_PYTHON_BINDINGS=ON", + "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache", + "-DCMAKE_C_COMPILER_LAUNCHER=ccache", + "-DBUILD_SHARED_LIBS=OFF", + "-DBUILD_VF_DOUBLE_ACCURACY=OFF", + "-DBUILD_VF_UNIT_TESTS:BOOL=OFF", + "-DBUILD_WARNINGS_AS_ERRORS=OFF", + ] + extra_args, + ) + ], + cmdclass=dict( + build_ext=cmake_build_extension.BuildExtension, + ), ) diff --git a/src/basics/basics/utilities/UbTuple.h b/src/basics/basics/utilities/UbTuple.h index fe9c787cead38621beafab3d082122277bdcff73..228ab48898e5e61777d2fcc0061eb6f0434d5cad 100644 --- a/src/basics/basics/utilities/UbTuple.h +++ b/src/basics/basics/utilities/UbTuple.h @@ -597,6 +597,8 @@ inline UbTuple<T1, T2, T3, T4, T5, T6, T7, T8> makeUbTuple(T1 const &a1, T2 cons // some typedefs using UbTupleFloat2 = UbTuple<float, float>; using UbTupleFloat3 = UbTuple<float, float, float>; +using UbTupleFloat4 = UbTuple<float, float, float, float>; +using UbTupleFloat6 = UbTuple<float, float, float,float, float, float>; using UbTupleInt2 = UbTuple<int, int>; using UbTupleInt3 = UbTuple<int, int, int>; using UbTupleInt4 = UbTuple<int, int, int, int>; diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3d77af8747fbdae366c372749be6014033797501 --- /dev/null +++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp @@ -0,0 +1,360 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WbWriterVtkXmlImageBinary.cpp +//! \ingroup writer +//! \author Soeren Freudiger, Sebastian Geller +//======================================================================================= +#include <basics/utilities/UbLogger.h> +#include <basics/writer/WbWriterVtkXmlImageBinary.h> +#include <cstring> + +using namespace std; + +/*===============================================================================*/ +const std::string WbWriterVtkXmlImageBinary::pvdEndTag = " </Collection>\n</VTKFile>"; +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeCollection(const string &filename, const vector<string> &filenames, + const double &timeStep, const bool &sepGroups) +{ + string vtkfilename = filename + ".pvd"; + ofstream out(vtkfilename.c_str()); + if (!out) { + out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!! + string path = UbSystem::getPathFromString(vtkfilename); + if (path.size() > 0) { + UbSystem::makeDirectory(path); + out.open(vtkfilename.c_str()); + } + if (!out) + throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); + } + + string endian; + if (UbSystem::isLittleEndian()) + endian = "LittleEndian"; + else + endian = "BigEndian"; + out << "<VTKFile type=\"Collection\" version=\"0.1\" byte_order=\"" << endian << "\" >" << endl; + out << " <Collection>" << endl; + + int group = 0, part = 0; + for (size_t i = 0; i < filenames.size(); i++) { + out << " <DataSet timestep=\"" << timeStep << "\" group=\"" << group << "\" part=\"" << part + << "\" file=\"" << filenames[i] << "\"/>" << endl; + if (sepGroups) + group++; + else + part++; + } + out << pvdEndTag; + out.close(); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::addFilesToCollection(const string &filename, const vector<string> &filenames, + const double &timeStep, const bool &sepGroups) +{ + string vtkfilename = filename; + fstream test(vtkfilename.c_str(), ios::in); + if (!test) { + test.clear(); + vtkfilename += ".pvd"; + test.open(vtkfilename.c_str(), ios::in); + if (!test) + return this->writeCollection(filename, filenames, timeStep, sepGroups); + } + + fstream out(vtkfilename.c_str(), ios::in | ios::out); + out.seekp(-(int)pvdEndTag.size() - 1, ios_base::end); + + int group = 0; + for (size_t i = 0; i < filenames.size(); i++) { + out << " <DataSet timestep=\"" << timeStep << "\" group=\"" << group << "\" part=\"" << i << "\" file=\"" + << filenames[i] << "\"/>" << endl; + if (sepGroups) + group++; + } + out << pvdEndTag; + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeParallelFile(const string &filename, const UbTupleInt6 &wholeExtent, const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing, + vector<string> &pieceSources, vector<UbTupleInt6> &pieceExtents, + vector<string> &pointDataNames, vector<string> &cellDataNames) +{ + string vtkfilename = filename + ".pvti"; + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - start"); + + ofstream out(vtkfilename.c_str()); + if (!out) { + out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!! + string path = UbSystem::getPathFromString(vtkfilename); + if (path.size() > 0) { + UbSystem::makeDirectory(path); + out.open(vtkfilename.c_str()); + } + if (!out) + throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); + } + + // VTK FILE + out << "<VTKFile type=\"PImageData\" version=\"0.1\" byte_order=\"LittleEndian\">" + << "\n"; + out << " <PImageData " + << "WholeExtent=\"" << val<1>(wholeExtent) << " " + << val<2>(wholeExtent) << " " + << val<3>(wholeExtent) << " " + << val<4>(wholeExtent) << " " + << val<5>(wholeExtent) << " " + << val<6>(wholeExtent) << "\" " + << "GhostLevel=\"0\" " + << "Origin=\"" << val<1>(origin) << " " + << val<2>(origin) << " " + << val<3>(origin) << "\" " + << "Spacing=\"" << val<1>(spacing) << " " + << val<2>(spacing) << " " + << val<3>(spacing) << "\" " + << "> \n"; + out << " <PPointData>\n"; + for (size_t s = 0; s < pointDataNames.size(); s++) + out << " <PDataArray type=\"Float32\" Name=\"" << pointDataNames[s] << "\"/>\n"; + out << " </PPointData>\n"; + if (cellDataNames.size() > 0) { + out << " <PCellData>\n"; + for (size_t s = 0; s < cellDataNames.size(); s++) + out << " <PDataArray type=\"Float32\" Name=\"" << cellDataNames[s] << "\"/>\n"; + out << " </PCellData>\n"; + } + for (size_t s = 0; s < pieceSources.size(); s++) + out << " <Piece Extent=\"" << val<1>(pieceExtents[s]) << " " + << val<2>(pieceExtents[s]) << " " + << val<3>(pieceExtents[s]) << " " + << val<4>(pieceExtents[s]) << " " + << val<5>(pieceExtents[s]) << " " + << val<6>(pieceExtents[s]) << "\" Source=\"" << pieceSources[s] << "\"/>\n"; + out << " </PImageData>\n"; + out << "</VTKFile>"; + out << endl; + out.close(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - end"); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeOctsWithCellData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<UbTupleInt8> &cells, vector<string> &datanames, + vector<vector<double>> &celldata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - start"); + + vector<string> nodeDataNames; + vector<vector<double>> nodedata; + + UbTupleFloat3 origin, spacing; + UbTupleInt6 extent; + + getMetaDataOfImage(nodes, origin, spacing, extent); + + this->writeData(vtkfilename, nodeDataNames, datanames, nodedata, celldata, extent, origin, spacing, extent); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - end"); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeOctsWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<UbTupleUInt8> &cells, vector<string> &datanames, + vector<vector<double>> &nodedata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - start"); + + vector<string> cellDataNames; + vector<vector<double>> cellData; + + UbTupleFloat3 origin, spacing; + UbTupleInt6 extent; + + getMetaDataOfImage(nodes, origin, spacing, extent); + + this->writeData(vtkfilename, datanames, cellDataNames, nodedata, cellData, extent, origin, spacing, extent); + + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - end"); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeNodesWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<string> &datanames, + vector<vector<double>> &nodedata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeNodesWithNodeData to " << vtkfilename << " - start"); + + vector<string> cellDataNames; + vector<vector<double>> cellData; + + UbTupleFloat3 origin, spacing; + UbTupleInt6 extent; + + getMetaDataOfImage(nodes, origin, spacing, extent); + this->writeData(vtkfilename, datanames, cellDataNames, nodedata, cellData, extent, origin, spacing, extent); + + return vtkfilename; +} + +void WbWriterVtkXmlImageBinary::getMetaDataOfImage(vector<UbTupleFloat3> &nodes, UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent) +{ + int nofNodes = (int)nodes.size(); + val<1>(origin) = val<1>(nodes[0]); + val<2>(origin) = val<2>(nodes[0]); + val<3>(origin) = val<3>(nodes[0]); + + float l_x = val<1>(nodes[nofNodes-1])-val<1>(origin); + float l_y = val<2>(nodes[nofNodes-1])-val<2>(origin); + + val<1>(spacing) = val<1>(nodes[1])-val<1>(nodes[0]); + int nx = (l_x) / val<1>(spacing); + val<2>(spacing) = val<2>(nodes[nx])-val<2>(nodes[0]); + int ny = (l_y) / val<2>(spacing); + val<3>(spacing) = val<3>(nodes[nx*ny])-val<3>(nodes[0]); + + val<1>(extent) = val<1>(origin)/val<1>(spacing); val<2>(extent) = val<1>(nodes[nofNodes-1])/val<1>(spacing); + val<3>(extent) = val<2>(origin)/val<2>(spacing); val<4>(extent) = val<2>(nodes[nofNodes-1])/val<2>(spacing); + val<5>(extent) = val<3>(origin)/val<3>(spacing); val<6>(extent) = val<3>(nodes[nofNodes-1])/val<3>(spacing); + +} + +void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename, + vector<string> &pointDataNames, vector<string> &cellDataNames, + vector<vector<double>> &nodedata, vector<vector<double>> &celldata, + UbTupleInt6& wholeExtent, + UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent) +{ + ofstream out(vtkfilename.c_str(), ios::out | ios::binary); + if (!out) { + out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!! + string path = UbSystem::getPathFromString(vtkfilename); + if (path.size() > 0) { + UbSystem::makeDirectory(path); + out.open(vtkfilename.c_str(), ios::out | ios::binary); + } + if (!out) + throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); + } + + size_t nPoints = pointDataNames.size()>0 ? nodedata[0].size() : celldata[0].size(); + + int bytesPerByteVal = 4; //==sizeof(int) + + int bytesScalarData = 1 /*scalar */ * (int)nPoints * sizeof(double); + + int offset = 0; + + // VTK FILE + out << "<?xml version=\"1.0\"?>\n"; + out << "<VTKFile type=\"ImageData\" version=\"0.1\" byte_order=\"LittleEndian\" >" + << "\n"; + out << " <ImageData " + << "WholeExtent=\"" << val<1>(wholeExtent) << " " + << val<2>(wholeExtent) << " " + << val<3>(wholeExtent) << " " + << val<4>(wholeExtent) << " " + << val<5>(wholeExtent) << " " + << val<6>(wholeExtent) << "\" " + << "Origin=\"" << val<1>(origin) << " " + << val<2>(origin) << " " + << val<3>(origin) << "\" " + << "Spacing=\"" << val<1>(spacing) << " " + << val<2>(spacing) << " " + << val<3>(spacing) << "\"" + << "> \n"; + out << " <Piece Extent=\"" << val<1>(extent) << " " + << val<2>(extent) << " " + << val<3>(extent) << " " + << val<4>(extent) << " " + << val<5>(extent) << " " + << val<6>(extent) << "\">\n"; + + // DATA SECTION + if (pointDataNames.size()>0) + { + out << " <PointData>\n"; + for (size_t s = 0; s < pointDataNames.size(); ++s) { + out << " <DataArray type=\"Float64\" Name=\"" << pointDataNames[s] << "\" format=\"appended\" offset=\"" + << offset << "\" /> \n"; + offset += (bytesPerByteVal + bytesScalarData); + } + out << " </PointData>\n"; + } + + if (cellDataNames.size()>0) + { + out << " <CellData>\n"; + for (size_t s = 0; s < cellDataNames.size(); ++s) { + out << " <DataArray type=\"Float64\" Name=\"" << cellDataNames[s] << "\" format=\"appended\" offset=\"" + << offset << "\" /> \n"; + offset += (bytesPerByteVal + bytesScalarData); + } + out << " </CellData>\n"; + } + + out << " </Piece>\n"; + out << " </ImageData>\n"; + + // AppendedData SECTION + out << " <AppendedData encoding=\"raw\">\n"; + out << "_"; + + + // DATA SECTION + // pointData + for (size_t s = 0; s < pointDataNames.size(); ++s) { + out.write((char *)&bytesScalarData, bytesPerByteVal); + for (size_t d = 0; d < nodedata[s].size(); ++d) { + double tmp = nodedata[s][d]; + out.write((char *)&tmp, sizeof(double)); + } + } + + // cellData + for (size_t s = 0; s < cellDataNames.size(); ++s) { + out.write((char *)&bytesScalarData, bytesPerByteVal); + for (size_t d = 0; d < celldata[s].size(); ++d) { + double tmp = celldata[s][d]; + out.write((char *)&tmp, sizeof(double)); + } + } + out << "\n </AppendedData>\n"; + out << "</VTKFile>"; + out << endl; + out.close(); +} \ No newline at end of file diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h new file mode 100644 index 0000000000000000000000000000000000000000..a45b51143accccf47147483dc0034e3ad77ca33d --- /dev/null +++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h @@ -0,0 +1,110 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WbWriterVtkXmlBinary.h +//! \ingroup writer +//! \author Soeren Freudiger, Sebastian Geller +//======================================================================================= +#ifndef WBWRITERVTKXMLIMAGEBINARY_H +#define WBWRITERVTKXMLIMAGEBINARY_H + +#include <string> + +#include <basics/writer/WbWriter.h> + +#include "basics_export.h" + +class BASICS_EXPORT WbWriterVtkXmlImageBinary : public WbWriter +{ +public: + static WbWriterVtkXmlImageBinary *getInstance() + { + static WbWriterVtkXmlImageBinary instance; + return &instance; + } + + WbWriterVtkXmlImageBinary(const WbWriterVtkXmlImageBinary &) = delete; + const WbWriterVtkXmlImageBinary &operator=(const WbWriterVtkXmlImageBinary &) = delete; + +private: + WbWriterVtkXmlImageBinary() : WbWriter() + { + if (sizeof(unsigned char) != 1) + throw UbException(UB_EXARGS, "machine error char type mismatch"); + if (sizeof(int) != 4) + throw UbException(UB_EXARGS, "machine error int type mismatch"); + if (sizeof(float) != 4) + throw UbException(UB_EXARGS, "machine error float type mismatch"); + } + + static const std::string pvdEndTag; + +public: + std::string getFileExtension() override { return ".bin.vti"; } + + // write a metafile + std::string writeCollection(const std::string &filename, const std::vector<std::string> &filenames, + const double ×tep, const bool &sepGroups); + std::string addFilesToCollection(const std::string &filename, const std::vector<std::string> &filenames, + const double ×tep, const bool &sepGroups); + std::string writeParallelFile(const std::string &filename, const UbTupleInt6 &wholeExtent, const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing, + std::vector<std::string> &pieceSources, std::vector<UbTupleInt6> &pieceExtents, + std::vector<std::string> &pointDataNames, std::vector<std::string> &cellDataNames); + + ////////////////////////////////////////////////////////////////////////// + // nodes + std::string writeNodesWithNodeData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, + std::vector<std::string> &datanames, + std::vector<std::vector<double>> &nodedata) override; + + ////////////////////////////////////////////////////////////////////////// + // octs + // 7 ---- 6 + // /| /| + // 4 +--- 5 | + // | | | | + // | 3 ---+ 2 + // |/ |/ + // 0 ---- 1 + std::string writeOctsWithCellData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, + std::vector<UbTupleInt8> &cells, std::vector<std::string> &datanames, + std::vector<std::vector<double>> &celldata) override; + std::string writeOctsWithNodeData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, + std::vector<UbTupleUInt8> &cells, std::vector<std::string> &datanames, + std::vector<std::vector<double>> &nodedata) override; + void writeData(const std::string &vtkfilename, + std::vector<std::string> &pointDataNames, std::vector<std::string> &cellDataNames, + std::vector<std::vector<double>> &nodedata, std::vector<std::vector<double>> &celldata, + UbTupleInt6 &wholeExtent, + UbTupleFloat3 &origin, UbTupleFloat3 &spacing, UbTupleInt6 &extent); + +private: + void getMetaDataOfImage(std::vector<UbTupleFloat3> &nodes, UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent); +}; + +#endif // WBWRITERVTKXMLIMAGEBINARY_H diff --git a/src/basics/config/ConfigurationFile.h b/src/basics/config/ConfigurationFile.h index ef7e7c9f06f94cabb3ba9cbefe95c8ee75736958..4a53f7add85b9c6461fda0bab20fa6656eebc5d3 100644 --- a/src/basics/config/ConfigurationFile.h +++ b/src/basics/config/ConfigurationFile.h @@ -64,6 +64,10 @@ public: template<class T> T getValue(const std::string& key) const; + //! get value with key and default value + template<class T> + T getValue(const std::string& key, T defaultValue) const; + private: //! the container std::map<std::string, std::string> data; @@ -138,6 +142,19 @@ T ConfigurationFile::getValue(const std::string& key) const return x; } +template<class T> +T ConfigurationFile::getValue(const std::string& key, T defaultValue) const +{ + if (contains(key)) + { + return getValue<T>(key); + } + else + { + return defaultValue; + } +} + } #endif diff --git a/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed1335d2314f6fe4459f711872c1af968b4a600d --- /dev/null +++ b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp @@ -0,0 +1,425 @@ +#include "VelocitySetter.h" +#include "GridGenerator/grid/Grid.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" +#include <logger/Logger.h> + + +#include <math.h> +#include <sstream> +#include <fstream> +#include <iostream> +#include <algorithm> + +SPtr<VelocityFileCollection> createFileCollection(std::string prefix, FileType type) +{ + switch(type) + { + case FileType::VTK: + return std::make_shared<VTKFileCollection>(prefix); + break; + default: + return nullptr; + } +} + +SPtr<VelocityReader> createReaderForCollection(SPtr<VelocityFileCollection> fileCollection) +{ + switch(fileCollection->getFileType()) + { + case FileType::VTK: + return std::make_shared<VTKReader>(std::static_pointer_cast<VTKFileCollection>(fileCollection)); + break; + default: + return nullptr; + } +} +template<typename T> +std::vector<T> readStringToVector(std::string s) +{ + std::vector<T> out; + std::stringstream input(s); + float num; + while(input >> num) + { + out.push_back(num); + } + return out; +} +std::string readElement(std::string line) +{ + size_t elemStart = line.find("<")+1; + // size_t elemEnd = line.find("/>", elemStart); + size_t nameLen = line.find(" ", elemStart)-elemStart; + return line.substr(elemStart, nameLen); +} + +std::string readAttribute(std::string line, std::string attributeName) +{ + size_t attributeStart = line.find(attributeName)+attributeName.size() + 2; // add 2 for '="' + size_t attributeLen = line.find("\"", attributeStart)-attributeStart; + return line.substr(attributeStart, attributeLen); +} + +void VTKFile::readHeader() +{ + //TODO make this more flexible + std::ifstream file(this->fileName); + + std::string line; + + getline(file, line); // VTKFile + if(line[1]=='?') getline(file, line); // ignore first line if xml version + + getline(file, line); // ImageData + std::vector<int> wholeExtent = readStringToVector<int>(readAttribute(line, "WholeExtent")); + std::vector<float> origin = readStringToVector<float>(readAttribute(line, "Origin")); + std::vector<float> spacing = readStringToVector<float>(readAttribute(line, "Spacing")); + + getline(file, line); // Piece + std::vector<int> pieceExtent = readStringToVector<int>(readAttribute(line, "Extent")); + getline(file, line); // PointData + + getline(file, line); + while(strcmp(readElement(line).c_str(), "DataArray")==0) + { + Quantity quant = Quantity(); + quant.name = readAttribute(line, "Name"); + quant.offset = std::stoi(readAttribute(line, "offset")); + this->quantities.push_back( quant ); + getline(file, line); + } + getline(file, line); // </Piece + getline(file, line); // </ImageData + getline(file, line); // AppendedData + + int offset = int(file.tellg())+sizeof(char)+4; // skip underscore and bytesPerVal + + for(auto& quantity: this->quantities) + { + quantity.offset += offset; + } + + file.close(); + + this->deltaX = spacing[0]; + this->deltaY = spacing[1]; + this->deltaZ = spacing[2]; + + this->nx = pieceExtent[1]-pieceExtent[0]+1; + this->ny = pieceExtent[3]-pieceExtent[2]+1; + this->nz = pieceExtent[5]-pieceExtent[4]+1; + + this->minX = origin[0]+this->deltaX*pieceExtent[0]; this->maxX = (this->nx-1)*this->deltaX+this->minX; + this->minY = origin[1]+this->deltaY*pieceExtent[2]; this->maxY = (this->ny-1)*this->deltaY+this->minY; + this->minZ = origin[2]+this->deltaZ*pieceExtent[4]; this->maxZ = (this->nz-1)*this->deltaZ+this->minZ; + // printFileInfo(); + +} + +bool VTKFile::markNANs(std::vector<uint> readIndices) +{ + std::ifstream buf(fileName.c_str(), std::ios::in | std::ios::binary); + + std::vector<double> tmp; + tmp.reserve(readIndices.size()); + buf.seekg(this->quantities[0].offset); + buf.read((char*) tmp.data(), sizeof(double)*readIndices.size()); + auto firstNAN = std::find_if(tmp.begin(), tmp.end(), [](auto it){ return isnan(it); }); + + return firstNAN != tmp.end(); +} + +void VTKFile::loadFile() +{ + std::ifstream buf(this->fileName.c_str(), std::ios::in | std::ios::binary); + for(auto& quantity: this->quantities) + { + quantity.values.resize(getNumberOfPoints()); + buf.seekg(quantity.offset); + buf.read(reinterpret_cast<char*>(quantity.values.data()), this->getNumberOfPoints()*sizeof(double)); + } + + buf.close(); + + this->loaded = true; +} + +void VTKFile::unloadFile() +{ + for(auto& quantity : this->quantities) + { + std::vector<double> replacement; + quantity.values.swap(replacement); + } + this->loaded = false; +} + +void VTKFile::getData(real* data, uint numberOfNodes, std::vector<uint> readIndeces, std::vector<uint> writeIndices, uint offsetRead, uint offsetWrite) +{ + if(!this->loaded) loadFile(); + + size_t nPoints = writeIndices.size(); + + for(size_t j=0; j<this->quantities.size(); j++) + { + real* quant = &data[j*numberOfNodes]; + for(size_t i=0; i<nPoints; i++) + { + quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndeces[i]+offsetRead]; + } + } +} + +void VTKFile::printFileInfo() +{ + printf("file %s with \n nx %i ny %i nz %i \n origin %f %f %f \n spacing %f %f %f \n", + fileName.c_str(), nx, ny, nz, minX, minY, minZ, deltaX, deltaY, deltaZ); + for(auto quantity: this->quantities) + { + printf("\t quantity %s offset %i \n", quantity.name.c_str(), quantity.offset); + } + +} + + +void VTKFileCollection::findFiles() +{ + bool foundLastLevel = false; + + while(!foundLastLevel) + { + bool foundLastID = false; + std::vector<std::vector<VTKFile>> filesOnThisLevel; + while(!foundLastID) + { + bool foundLastPart = false; + std::vector<VTKFile> filesWithThisId; + while (!foundLastPart) + { + std::string fname = makeFileName((int)files.size(), (int)filesOnThisLevel.size(), (int)filesWithThisId.size()); + std::ifstream f(fname); + if(f.good()) + filesWithThisId.emplace_back(fname); + else + foundLastPart = true; + + } + if(!filesWithThisId.empty()) + filesOnThisLevel.push_back(filesWithThisId); + else foundLastID = true; + } + if(!filesOnThisLevel.empty()) + files.push_back(filesOnThisLevel); + else foundLastLevel = true; + } +} + +void VelocityReader::getNeighbors(uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB) +{ + std::copy(planeNeighborNT.begin(), planeNeighborNT.end(), &neighborNT[writingOffset]); + std::copy(planeNeighborNB.begin(), planeNeighborNB.end(), &neighborNB[writingOffset]); + std::copy(planeNeighborST.begin(), planeNeighborST.end(), &neighborST[writingOffset]); + std::copy(planeNeighborSB.begin(), planeNeighborSB.end(), &neighborSB[writingOffset]); +} + +void VelocityReader::getWeights(real* _weightsNT, real* _weightsNB, real* _weightsST, real* _weightsSB) +{ + std::copy(weightsNT.begin(), weightsNT.end(), &_weightsNT[writingOffset]); + std::copy(weightsNB.begin(), weightsNB.end(), &_weightsNB[writingOffset]); + std::copy(weightsST.begin(), weightsST.end(), &_weightsST[writingOffset]); + std::copy(weightsSB.begin(), weightsSB.end(), &_weightsSB[writingOffset]); +} + + +void VTKReader::initializeIndexVectors() +{ + this->readIndices.resize(this->fileCollection->files.size()); + this->writeIndices.resize(this->fileCollection->files.size()); + this->nFile.resize(this->fileCollection->files.size()); + for(size_t lev=0; lev<this->fileCollection->files.size(); lev++) + { + this->readIndices[lev].resize(this->fileCollection->files[lev].size()); + this->writeIndices[lev].resize(this->fileCollection->files[lev].size()); + this->nFile[lev].resize(this->fileCollection->files[lev].size()); + } +} + +void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ) +{ + this->nPoints = (uint)coordsY.size(); + this->initializeIndexVectors(); + real max_diff = 1e-4; // maximum distance between point on grid and precursor plane to count as exact match + real eps = 1e-7; // small number to avoid division by zero + bool perfect_match = true; + + this->weightsNT.reserve(this->nPoints); + this->weightsNB.reserve(this->nPoints); + this->weightsST.reserve(this->nPoints); + this->weightsSB.reserve(this->nPoints); + + this->planeNeighborNT.reserve(this->nPoints); + this->planeNeighborNB.reserve(this->nPoints); + this->planeNeighborST.reserve(this->nPoints); + this->planeNeighborSB.reserve(this->nPoints); + + for(uint i=0; i<nPoints; i++) + { + + real posY = coordsY[i]; + real posZ = coordsZ[i]; + bool foundNT = false, foundNB = false, foundST = false, foundSB = false, foundAll = false; + + for(int level = (int)this->fileCollection->files.size()-1; level>=0; level--) // go backwards to find finest nodes first + { + for(int fileId=0; fileId<(int)this->fileCollection->files[level].size(); fileId++) + { + VTKFile file = this->fileCollection->files[level][fileId][0]; + if(!file.inBoundingBox(posY, posZ, 0.0f)) continue; + // y in simulation is x in precursor/file, z in simulation is y in precursor/file + // simulation -> file: N -> E, S -> W, T -> N, B -> S + int idx = file.findNeighborWSB(posY, posZ, 0.f); + if(idx!=-1) + { + // Filter for exact matches + if(abs(posY-file.getX(idx)) < max_diff && abs(posZ-file.getY(idx)) < max_diff) + { + this->weightsNT.emplace_back(1e6f); + this->weightsNB.emplace_back(0.f); + this->weightsST.emplace_back(0.f); + this->weightsSB.emplace_back(0.f); + uint writeIdx = this->getWriteIndex(level, fileId, idx); + this->planeNeighborNT.push_back(writeIdx); + this->planeNeighborNB.push_back(writeIdx); + this->planeNeighborST.push_back(writeIdx); + this->planeNeighborSB.push_back(writeIdx); + foundNT = true; foundNB = true; foundSB = true; foundST = true; + } + else + { + perfect_match = false; + } + + if(!foundSB) + { + foundSB = true; + real dy = file.getX(idx)-posY; + real dz = file.getY(idx)-posZ; + this->weightsSB.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighborSB.emplace_back(getWriteIndex(level, fileId, idx)); + } + + } + + if(!foundNT) //NT in simulation is EN in precursor + { + int idx = file.findNeighborENB(posY, posZ, 0.f); + if(idx!=-1) + { + foundNT = true; + real dy = file.getX(idx)-posY; + real dz = file.getY(idx)-posZ; + this->weightsNT.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighborNT.emplace_back(getWriteIndex(level, fileId, idx)); + } + } + + if(!foundNB) //NB in simulation is ES in precursor + { + int idx = file.findNeighborESB(posY, posZ, 0.f); + if(idx!=-1) + { + foundNB = true; + real dy = file.getX(idx)-posY; + real dz = file.getY(idx)-posZ; + this->weightsNB.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighborNT.emplace_back(getWriteIndex(level, fileId, idx)); + } + } + + if(!foundST) //ST in simulation is WN in precursor + { + int idx = file.findNeighborWNB(posY, posZ, 0.f); + if(idx!=-1) + { + foundST = true; + real dy = file.getX(idx)-posY; + real dz = file.getY(idx)-posZ; + this->weightsST.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighborST.emplace_back(getWriteIndex(level, fileId, idx)); + } + } + + foundAll = foundNT && foundNB && foundST && foundSB; + + if(foundAll) break; + } + if(foundAll) break; + } + + if(!foundAll) + throw std::runtime_error("Did not find neighbors in the VelocityFileCollection for all points"); + } + + if(perfect_match) + printf("Precursor was a perfect match \n"); + + + for(size_t level=0; level<this->fileCollection->files.size(); level++){ + for(size_t id=0; id<this->fileCollection->files[level].size(); id++){ + if(this->fileCollection->files[level][id][0].markNANs(this->readIndices[level][id])) + throw std::runtime_error("Found a NAN in the precursor where a velocity is needed"); + }} +} + +uint VTKReader::getWriteIndex(int level, int id, int linearIndex) +{ + auto it = std::find(this->writeIndices[level][id].begin(), this->writeIndices[level][id].end(), linearIndex); + uint idx = it-this->writeIndices[level][id].begin(); + if(it==this->writeIndices[level][id].end()) + { + this->writeIndices[level][id].push_back(this->nPointsRead); + this->readIndices[level][id].push_back(linearIndex); + this->nPointsRead++; + } + return idx; +} + + +void VTKReader::getNextData(real* data, uint numberOfNodes, real time) +{ + for(size_t level=0; level<this->fileCollection->files.size(); level++) + { + for(size_t id=0; id<this->fileCollection->files[level].size(); id++) + { + size_t nF = this->nFile[level][id]; + + + if(!this->fileCollection->files[level][id][nF].inZBounds(time)) + { + nF++; + + printf("switching to precursor file no. %zd\n", nF); + if(nF == this->fileCollection->files[level][id].size()) + throw std::runtime_error("Not enough Precursor Files to read"); + + this->fileCollection->files[level][id][nF-1].unloadFile(); + if(nF+1<this->fileCollection->files[level][id].size()) + { + VTKFile* nextFile = &this->fileCollection->files[level][id][nF+1]; + if(! nextFile->isLoaded()) + { + read.wait(); + read = std::async(std::launch::async, [](VTKFile* file){ file->loadFile(); }, &this->fileCollection->files[level][id][nF+1]); + } + } + } + + + VTKFile* file = &this->fileCollection->files[level][id][nF]; + + int off = file->getClosestIdxZ(time)*file->getNumberOfPointsInXYPlane(); + file->getData(data, numberOfNodes, this->readIndices[level][id], this->writeIndices[level][id], off, this->writingOffset); + this->nFile[level][id] = nF; + } + } +} \ No newline at end of file diff --git a/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h new file mode 100644 index 0000000000000000000000000000000000000000..fe8fbbacf51843c599769a58d31c1d8e1fa5b0d6 --- /dev/null +++ b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h @@ -0,0 +1,199 @@ +#ifndef VELOCITY_SETTER_H_ +#define VELOCITY_SETTER_H_ + +#include "Core/DataTypes.h" +#include <Core/StringUtilities/StringUtil.h> +#include "PointerDefinitions.h" + +#include <string> +#include <vector> +#include <math.h> +#include <sstream> +#include <future> +class Grid; +namespace gg +{ + class BoundaryCondition; +} + + +enum class FileType +{ + VTK +}; + +struct Quantity +{ + std::string name; + int offset; + std::vector<double> values; +}; + +class VTKFile +{ +public: + VTKFile(std::string _fileName): + fileName(_fileName) + { + readHeader(); + this->loaded = false; + // printFileInfo(); + }; + + void getData(real* data, uint numberOfNodes, std::vector<uint> readIndeces, std::vector<uint> writeIndices, uint offsetRead, uint offsetWrite); + bool markNANs(std::vector<uint> readIndices); + bool inBoundingBox(real posX, real posY, real posZ){return inXBounds(posX) && inYBounds(posY) && inZBounds(posZ); }; + bool inXBounds(real posX){ return posX<=maxX && posX>=minX; }; + bool inYBounds(real posY){ return posY<=maxY && posY>=minY; }; + bool inZBounds(real posZ){ return posZ<=maxZ && posZ>=minZ; }; + int findNeighborWSB(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX) , getIdxSY(posY) , getIdxBZ(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborWST(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX) , getIdxSY(posY) , getIdxBZ(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborWNB(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX) , getIdxSY(posY)+1, getIdxBZ(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborWNT(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX) , getIdxSY(posY)+1, getIdxBZ(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborESB(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX)+1, getIdxSY(posY) , getIdxBZ(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborEST(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX)+1, getIdxSY(posY) , getIdxBZ(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborENB(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX)+1, getIdxSY(posY)+1, getIdxBZ(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborENT(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxWX(posX)+1, getIdxSY(posY)+1, getIdxBZ(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int getIdxX(int linearIdx){ return linearIdx%nx;}; + int getIdxY(int linearIdx){ return (linearIdx/nx)%ny;}; + int getIdxZ(int linearIdx){ return linearIdx/(nx*ny); }; + real getX(int linearIdx){ return getIdxX(linearIdx)*deltaX+minX; }; + real getY(int linearIdx){ return getIdxY(linearIdx)*deltaY+minY; }; + real getZ(int linearIdx){ return getIdxZ(linearIdx)*deltaZ+minZ; }; + int getIdxWX(real posX){ return (posX-minX)/deltaX; }; + int getIdxSY(real posY){ return (posY-minY)/deltaY; }; + int getIdxBZ(real posZ){ return (posZ-minZ)/deltaZ; }; + int getClosestIdxX(real posX){ int x = round((posX-minX)/deltaX); return x>nx ? nx : (x<0 ? 0 : x);}; + int getClosestIdxY(real posY){ int y = round((posY-minY)/deltaY); return y>ny ? ny : (y<0 ? 0 : y);}; + int getClosestIdxZ(real posZ){ int z = round((posZ-minZ)/deltaZ); return z>nz ? nz : (z<0 ? 0 : z);}; + int getLinearIndex(int idxX, int idxY, int idxZ){ return idxX + nx*(idxY+ny*idxZ); }; + int getNumberOfPointsInXYPlane(){ return nx*ny; } + int getNumberOfPointsInYZPlane(){ return ny*nz; } + int getNumberOfPointsInXZPlane(){ return nx*nz; } + int getNumberOfPoints(){ return nx*ny*nz; } + size_t getNumberOfQuantities(){ return quantities.size(); } + void loadFile(); + void unloadFile(); + bool isLoaded(){return loaded;}; + + +private: + void readHeader(); + void printFileInfo(); + +public: + +private: + std::string fileName; + real minX, maxX, minY, maxY, minZ, maxZ; + real deltaX, deltaY, deltaZ; + int nx, ny, nz; + std::vector<Quantity> quantities; + bool loaded; +}; + +class VelocityFileCollection +{ +public: + VelocityFileCollection(std::string _prefix): + prefix(_prefix){}; + + virtual ~VelocityFileCollection() = default; + + virtual size_t getNumberOfQuantities()=0; + + virtual FileType getFileType()=0; + +protected: + std::string prefix; +}; + + +class VTKFileCollection : public VelocityFileCollection +{ +public: + VTKFileCollection(std::string _prefix): + VelocityFileCollection(_prefix) + { + findFiles(); + }; + + FileType getFileType(){ return FileType::VTK; }; + size_t getNumberOfQuantities(){ return files[0][0][0].getNumberOfQuantities(); } + + +private: + void findFiles(); + std::string makeFileName(int level, int id, int part) + { + return prefix + "_lev_" + StringUtil::toString<int>(level) + + "_ID_" + StringUtil::toString<int>(id) + + "_File_" + StringUtil::toString<int>(part) + + ".bin." + suffix; + }; + + +public: + static const inline std::string suffix = "vti"; + std::vector<std::vector<std::vector<VTKFile>>> files; +}; + + +class VelocityReader +{ +public: + VelocityReader() + { + this->nPoints = 0; + this->nPointsRead = 0; + this->writingOffset = 0; + }; + virtual ~VelocityReader() = default; + + virtual void getNextData(real* data, uint numberOfNodes, real time)=0; + virtual void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ)=0; + uint getNPoints(){return nPoints; }; + uint getNPointsRead(){return nPointsRead; }; + size_t getNumberOfQuantities(){ return nQuantities; }; + void setWritingOffset(uint offset){ this->writingOffset = offset; } + void getNeighbors(uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSN); + void getWeights(real* _weightsNT, real* _weightsNB, real* _weightsST, real* _weightsSB); + +public: + std::vector<uint> planeNeighborNT, planeNeighborNB, planeNeighborST, planeNeighborSB; + std::vector<real> weightsNT, weightsNB, weightsST, weightsSB; + +protected: + uint nPoints, nPointsRead, writingOffset; + uint nReads=0; + size_t nQuantities=0; +}; + + +class VTKReader : public VelocityReader +{ +public: + VTKReader(SPtr<VTKFileCollection> _fileCollection): + fileCollection(_fileCollection) + { + this->nQuantities = fileCollection->getNumberOfQuantities(); + read = std::async([](){}); + }; + void getNextData(real* data, uint numberOfNodes, real time) override; + void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ) override; +private: + uint getWriteIndex(int level, int id, int linearIdx); + void initializeIndexVectors(); + +private: + std::vector<std::vector<std::vector<uint>>> readIndices, writeIndices; + std::vector<std::vector<size_t>> nFile; + SPtr<VTKFileCollection> fileCollection; + std::future<void> read; +}; + + +SPtr<VelocityFileCollection> createFileCollection(std::string prefix, FileType type); +SPtr<VelocityReader> createReaderForCollection(SPtr<VelocityFileCollection> fileCollection); + +#endif //VELOCITY_SETTER_H_ \ No newline at end of file diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp index 5102f60fc295aadf4323a4b332bf3dd8f7f21dbf..a0cc56b08d3e04ab3c04ed7ddca017336abf3c21 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp @@ -36,12 +36,12 @@ #include "grid/BoundaryConditions/Side.h" #include "grid/Grid.h" +#include "GridGenerator/VelocitySetter/VelocitySetter.h" bool gg::BoundaryCondition::isSide( SideType side ) const { return this->side->whoAmI() == side; } - ////////////////////////////////////////////////////////////////////////// void VelocityBoundaryCondition::setVelocityProfile( @@ -124,5 +124,4 @@ void StressBoundaryCondition::fillSamplingIndices(std::vector<SPtr<Grid> > grid, this->velocitySamplingIndices.push_back( grid[level]->transCoordToIndex(x_sampling, y_sampling, z_sampling) ); } -} - +} \ No newline at end of file diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h index 4a3990d9f815042297be76ae83a61268c8ad6815..f70aa0cf886019e6a97ca5c86a0cdafa1296b141 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h @@ -45,6 +45,8 @@ class Grid; class Side; enum class SideType; +class VelocityReader; + namespace gg { class BoundaryCondition @@ -63,6 +65,8 @@ public: bool isSide(SideType side) const; real getQ(uint index, uint dir) { return this->qs[index][dir]; } + + void getCoords( SPtr<Grid> grid, std::vector<real>& x, std::vector<real>& y, std::vector<real>& z); }; } @@ -246,6 +250,7 @@ public: real getVy(uint index) { return this->vyList[index]; } real getVz(uint index) { return this->vzList[index]; } + void setVelocityProfile( SPtr<Grid> grid, std::function<void(real,real,real,real&,real&,real&)> velocityProfile ); }; @@ -329,5 +334,32 @@ public: real getNormalz(uint index) { return this->normalZList[index]; } }; +class PrecursorBoundaryCondition : public gg::BoundaryCondition +{ +public: + static SPtr<PrecursorBoundaryCondition> make(SPtr<VelocityReader> reader, int nTRead, real velocityX, real velocityY, real velocityZ) + { + return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, nTRead, velocityX, velocityY, velocityZ)); + } + SPtr<VelocityReader> getReader(){ return reader; } + real getVelocityX() { return velocityX; } + real getVelocityY() { return velocityY; } + real getVelocityZ() { return velocityZ; } + +private: + PrecursorBoundaryCondition(SPtr<VelocityReader> _reader, uint _nTRead, real vx, real vy, real vz) : reader(_reader), nTRead(_nTRead), velocityX(vx), velocityY(vy), velocityZ(vz) { }; + virtual char getType() const override + { + return vf::gpu::BC_VELOCITY; + } +public: + uint nTRead; + +private: + real velocityX = 0.0; + real velocityY = 0.0; + real velocityZ = 0.0; + SPtr<VelocityReader> reader; +}; #endif \ No newline at end of file diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index 6c7bf8ca1853826d83fb6a713ffe03716bd2cf9a..270aff9d8ff9639b8ae0c19451ca90990eba9c63 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -40,6 +40,18 @@ using namespace gg; +std::vector<real> Side::getNormal() +{ + std::vector<real> normal; + if(this->getCoordinate()==X_INDEX) + normal = {(real)this->getDirection(), 0.0, 0.0}; + if(this->getCoordinate()==Y_INDEX) + normal = {0.0, (real)this->getDirection(), 0.0}; + if(this->getCoordinate()==Z_INDEX) + normal = {0.0, 0.0, (real)this->getDirection()}; + return normal; +} + void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter) { @@ -49,11 +61,20 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition { const uint index = getIndex(grid, coord, constant, v1, v2); - if ((index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID - || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC - || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF )) + if ((index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID + || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC + || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + + //! Enforce overlap of BCs on edge nodes + || grid->getFieldEntry(index) == vf::gpu::BC_PRESSURE + || grid->getFieldEntry(index) == vf::gpu::BC_VELOCITY + || grid->getFieldEntry(index) == vf::gpu::BC_NOSLIP + || grid->getFieldEntry(index) == vf::gpu::BC_SLIP + || grid->getFieldEntry(index) == vf::gpu::BC_STRESS ) + /*&& boundaryCondition->getType()!=vf::gpu::BC_STRESS*/ ) { grid->setFieldEntry(index, boundaryCondition->getType()); boundaryCondition->indices.push_back(index); @@ -64,6 +85,24 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition boundaryCondition->patches.push_back(0); } + // else if(boundaryCondition->getType()==vf::gpu::BC_STRESS && (index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID + // || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC + // || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF + // || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC + // || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + // || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + // || grid->getFieldEntry(index) == vf::gpu::BC_PRESSURE + // )) + // { + // grid->setFieldEntry(index, boundaryCondition->getType()); + // boundaryCondition->indices.push_back(index); + // setPressureNeighborIndices(boundaryCondition, grid, index); + // setStressSamplingIndices(boundaryCondition, grid, index); + + // setQs(grid, boundaryCondition, index); + + // boundaryCondition->patches.push_back(0); + // } } } @@ -152,16 +191,28 @@ void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uin else neighborZ = grid->getLastFluidNode ( coords, 2, grid->getEndZ() ); } + //! Only seting q's that partially point in the Side-normal direction + bool alignedWithNormal = (this->getNormal()[0]*grid->getDirection()[dir * DIMENSION + 0]+ + this->getNormal()[1]*grid->getDirection()[dir * DIMENSION + 1]+ + this->getNormal()[2]*grid->getDirection()[dir * DIMENSION + 2] ) > 0; + + // if(boundaryCondition->getType()==vf::gpu::BC_VELOCITY && z < 8.0 ) + // { + // alignedWithNormal = true; + // printf("XYZ: %f \t %f \t %f \n", x,y,z); + // printf("dir: %d \t %d \t %d \n\n", grid->getDirection()[dir * DIMENSION + 0], grid->getDirection()[dir * DIMENSION + 1], grid->getDirection()[dir * DIMENSION + 2]); + // } + uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ ); - if( grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY || - grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID || - grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID ) + if((grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY || + grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID || + grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID) && + alignedWithNormal ) qNode[dir] = 0.5; else qNode[dir] = -1.0; - } - + boundaryCondition->qs.push_back(qNode); } @@ -260,7 +311,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond real coordinateNormal = grid[level]->getStartY() + grid[level]->getDelta(); if( coordinateNormal > grid[0]->getStartY() + grid[0]->getDelta() ) return; - + Side::addIndices(grid[level], boundaryCondition, "y", coordinateNormal, startInner, endInner, startOuter, endOuter); } diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h index 6df6bfccc9a39b80de3ac43d057a03945d035b34..53a763bc562ee978042b28d24856fbcca256c5f9 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h @@ -72,15 +72,17 @@ public: virtual SideType whoAmI() const = 0; + std::vector<real> getNormal(); + protected: - static void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, + void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter); static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); static void setStressSamplingIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); - static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); + void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); private: static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2); diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index 739aef59f76a33fa67d472a77ef258469f5e411c..8e86e8bae6c4801839486767e2c85ab100b8588d 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -54,6 +54,7 @@ class GridWrapper; class Transformator; class ArrowTransformator; class PolyDataWriterWrapper; +class VelocityReader; class BoundingBox; class Grid; @@ -113,6 +114,15 @@ public: virtual void getPressureValues(real *rho, int *indices, int *neighborIndices, int level) const = 0; virtual void getPressureQs(real *qs[27], int level) const = 0; + virtual uint getPrecursorSize(int level) const = 0; + virtual void getPrecursorValues(uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, + real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, + int* indices, std::vector<SPtr<VelocityReader>>& reader, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, + real& velocityX, real& velocityY, real& velocityZ, int level) const = 0; + + virtual void getPrecursorQs(real* qs[27], int level) const = 0; + virtual uint getGeometrySize(int level) const = 0; virtual void getGeometryIndices(int *indices, int level) const = 0; virtual void getGeometryQs(real *qs[27], int level) const = 0; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp index 083b9a51e0b151f49922df456e968c4b204e4af7..0fd97f95817a73992347d33279978afdef87b0fc 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp @@ -52,6 +52,8 @@ #include "io/QLineWriter.h" #include "io/SimulationFileWriter/SimulationFileWriter.h" +#include "VelocitySetter/VelocitySetter.h" + #include "utilities/communication.h" #include "utilities/transformator/ArrowTransformator.h" @@ -243,6 +245,24 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition() } } +void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<VelocityFileCollection> fileCollection, int nTRead, real velocityX, real velocityY, real velocityZ) +{ + for (uint level = 0; level < getNumberOfGridLevels(); level++) + { + auto reader = createReaderForCollection(fileCollection); + SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, nTRead, velocityX, velocityY, velocityZ); + + auto side = SideFactory::make(sideType); + + precursorBoundaryCondition->side = side; + precursorBoundaryCondition->side->addIndices(grids, level, precursorBoundaryCondition); + + boundaryConditions[level]->precursorBoundaryConditions.push_back(precursorBoundaryCondition); + + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Precursor BC on level " << level << " with " << (int)precursorBoundaryCondition->indices.size() << "\n"; + } +} + GRIDGENERATOR_EXPORT void LevelGridBuilder::setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) { for (uint level = 0; level < this->grids.size(); level++) @@ -594,6 +614,87 @@ void LevelGridBuilder::getPressureQs(real* qs[27], int level) const } } +uint LevelGridBuilder::getPrecursorSize(int level) const +{ + uint size = 0; + for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) + { + size += uint(boundaryCondition->indices.size()); + } + return size; +} + +void LevelGridBuilder::getPrecursorValues( uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, + real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, + int* indices, std::vector<SPtr<VelocityReader>>& reader, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, + real& velocityX, real& velocityY, real& velocityZ, int level) const +{ + int allIndicesCounter = 0; + int allNodesCounter = 0; + uint tmpNTRead = 0; + size_t tmpNQuantities = 0; + + for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) + { + if( tmpNTRead == 0 ) + tmpNTRead = boundaryCondition->nTRead; + if( tmpNTRead != boundaryCondition->nTRead ) + throw std::runtime_error("All precursor boundary conditions must have the same NTRead value"); + + auto BCreader = boundaryCondition->getReader(); + BCreader->setWritingOffset(allIndicesCounter); + reader.push_back(BCreader); + + std::vector<real> y, z; + real xTmp, yTmp, zTmp; + for(uint i = 0; i<boundaryCondition->indices.size(); i++) + { + indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) + 1; + grids[level]->transIndexToCoords(boundaryCondition->indices[i], xTmp, yTmp, zTmp); + y.push_back(yTmp); + z.push_back(zTmp); + allIndicesCounter++; + } + BCreader->fillArrays(y, z); + BCreader->getNeighbors(neighborNT, neighborNB, neighborST, neighborSB); + BCreader->getWeights(weightsNT, weightsNB, weightsST, weightsSB); + if(tmpNQuantities == 0) + tmpNQuantities = BCreader->getNumberOfQuantities(); + if(tmpNQuantities != BCreader->getNumberOfQuantities()) + throw std::runtime_error("All precursor files must have the same quantities."); + allNodesCounter += BCreader->getNPointsRead(); + velocityX = boundaryCondition->getVelocityX(); + velocityY = boundaryCondition->getVelocityY(); + velocityZ = boundaryCondition->getVelocityZ(); + } + numberOfPrecursorNodes = allNodesCounter; + + if (tmpNTRead == 0) + throw std::runtime_error("NTRead of precursor needs to be larger than 0."); + nTRead = tmpNTRead; + + if (tmpNQuantities == 0) + throw std::runtime_error("Number of quantities in precursor needs to be larger than 0."); + numberOfQuantities = tmpNQuantities; +} + +void LevelGridBuilder::getPrecursorQs(real* qs[27], int level) const +{ + int allIndicesCounter = 0; + for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) + { + for ( uint index = 0; index < boundaryCondition->indices.size(); index++ ) + { + for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++) + { + qs[dir][allIndicesCounter] = boundaryCondition->qs[index][dir]; + } + allIndicesCounter++; + } + } +} + uint LevelGridBuilder::getGeometrySize(int level) const { if (boundaryConditions[level]->geometryBoundaryCondition) diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index 4d73b8b0464b9823f7fad0ac011450a23b4054f1..7240f466b077bf612361dd3b0465faf6ec97420b 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -58,8 +58,12 @@ class SlipBoundaryCondition; class StressBoundaryCondition; class PressureBoundaryCondition; class GeometryBoundaryCondition; +class PrecursorBoundaryCondition; enum class SideType; +class VelocityReader; +class VelocityFileCollection; + class LevelGridBuilder : public GridBuilder @@ -80,6 +84,7 @@ public: GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho); GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z); GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType); + GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<VelocityFileCollection> fileCollection, int nTRead, real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f); GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall); @@ -121,6 +126,14 @@ public: GRIDGENERATOR_EXPORT void getPressureValues(real* rho, int* indices, int* neighborIndices, int level) const override; GRIDGENERATOR_EXPORT virtual void getPressureQs(real* qs[27], int level) const override; + GRIDGENERATOR_EXPORT uint getPrecursorSize(int level) const override; + GRIDGENERATOR_EXPORT void getPrecursorValues( uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, + real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, + int* indices, std::vector<SPtr<VelocityReader>>& reader, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, + real& velocityX, real& velocityY, real& velocityZ, int level) const override; + GRIDGENERATOR_EXPORT virtual void getPrecursorQs(real* qs[27], int level) const override; + GRIDGENERATOR_EXPORT virtual void getGeometryQs(real *qs[27], int level) const override; GRIDGENERATOR_EXPORT virtual uint getGeometrySize(int level) const override; GRIDGENERATOR_EXPORT virtual void getGeometryIndices(int *indices, int level) const override; @@ -149,6 +162,8 @@ protected: std::vector<SPtr<VelocityBoundaryCondition>> noSlipBoundaryConditions; + std::vector<SPtr<PrecursorBoundaryCondition>> precursorBoundaryConditions; + SPtr<GeometryBoundaryCondition> geometryBoundaryCondition; }; bool geometryHasValues = false; diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index 759528e5346ba8d9899cb90eb64503b20a44c4fc..fd84df4a667f6506d95a1afb3ff15cae8fdb0d15 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -24,5 +24,8 @@ if(BUILD_VF_UNIT_TESTS) set_source_files_properties(DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(BoundaryConditions/BoundaryConditionFactoryTest.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(KernelManager/BCKernelManagerTest.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(Parameter/ParameterTest.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(Parameter/EdgeNodeFinderTest.cpp PROPERTIES LANGUAGE CUDA) target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/") endif() diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 296ab819c5538a6b6d6a6827b5c28cbc475af838..b7e8f595c678e571be8894a611d5f7386ca54bd0 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -28,7 +28,7 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) ////////////////////////////////////////////////////////////////////////// - postCollisionBC(level); + postCollisionBC(level, t); ////////////////////////////////////////////////////////////////////////// @@ -227,9 +227,10 @@ void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, int streamIndex) } } -void UpdateGrid27::postCollisionBC(int level) +void UpdateGrid27::postCollisionBC(int level, uint t) { ////////////////////////////////////////////////////////////////////////// + // G E O M E T R Y // V E L O C I T Y (I N F L O W) this->bcKernelManager->runVelocityBCKernelPost(level); @@ -257,6 +258,10 @@ void UpdateGrid27::postCollisionBC(int level) // P R E S S U R E this->bcKernelManager->runPressureBCKernelPost(level); + ////////////////////////////////////////////////////////////////////////// + // P R E C U R S O R + this->bcKernelManager->runPrecursorBCKernelPost(level, t, cudaMemoryManager.get()); + ////////////////////////////////////////////////////////////////////////// // A D V E C T I O N D I F F U S I O N if (para->getDiffOn()) diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 8110923bf066412e2bb09ffa1f10efe3ddc983c7..bb26a6237aadb71514e5e100dc04318100be1f7b 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -34,7 +34,7 @@ private: void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1); void collisionAdvectionDiffusion(int level); - void postCollisionBC(int level); + void postCollisionBC(int level, unsigned int t); void preCollisionBC(int level, unsigned int t); void collisionPorousMedia(int level); diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index aecd306373eff65a80c708c9c8783c155e1ebe48..3edeb199ef02cd2b633d25c9c0b3a1b9f94d3187 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -10,6 +10,7 @@ #include <algorithm> #include "utilities/math/Math.h" #include "Output/QDebugWriter.hpp" +#include "GridGenerator/VelocitySetter/VelocitySetter.h" #include "utilities/communication.h" #include "Communication/Communicator.h" @@ -123,6 +124,7 @@ void GridGenerator::allocArrays_BoundaryValues() //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// para->getParH(level)->pressureBC.numberOfBCnodes = 0; + para->getParD(level)->outflowPressureCorrectionFactor = para->getOutflowPressureCorrectionFactor(); if (numberOfPressureValues > 1) { blocks = (numberOfPressureValues / para->getParH(level)->numberofthreads) + 1; @@ -236,6 +238,99 @@ void GridGenerator::allocArrays_BoundaryValues() para->getParD(level)->numberOfVeloBCnodesRead = para->getParH(level)->velocityBC.numberOfBCnodes * para->getD3Qxx(); } + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { + const auto numberOfPrecursorValues = int(builder->getPrecursorSize(level)); + std::cout << "size precursor level " << level << " : " << numberOfPrecursorValues << std::endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + int blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1; + para->getParH(level)->precursorBC.sizeQ = blocks * para->getParH(level)->numberofthreads; + para->getParD(level)->precursorBC.sizeQ = para->getParH(level)->precursorBC.sizeQ; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + para->getParH(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues; + para->getParD(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues; + para->getParH(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx(); + para->getParD(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx(); + + if (numberOfPrecursorValues > 1) + { + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + cudaMemoryManager->cudaAllocPrecursorBC(level); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + builder->getPrecursorValues( + para->getParH(level)->precursorBC.planeNeighborNT, para->getParH(level)->precursorBC.planeNeighborNB, + para->getParH(level)->precursorBC.planeNeighborST, para->getParH(level)->precursorBC.planeNeighborSB, + para->getParH(level)->precursorBC.weightsNT, para->getParH(level)->precursorBC.weightsNB, + para->getParH(level)->precursorBC.weightsST, para->getParH(level)->precursorBC.weightsSB, + para->getParH(level)->precursorBC.k, para->getParH(level)->velocityReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes, + para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.nTRead, + para->getParH(level)->precursorBC.velocityX, para->getParH(level)->precursorBC.velocityY, para->getParH(level)->precursorBC.velocityZ, + level); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + para->getParD(level)->precursorBC.numberOfPrecursorNodes = para->getParH(level)->precursorBC.numberOfPrecursorNodes; + para->getParD(level)->precursorBC.numberOfQuantities = para->getParH(level)->precursorBC.numberOfQuantities; + para->getParD(level)->precursorBC.nTRead = para->getParH(level)->precursorBC.nTRead; + para->getParD(level)->precursorBC.velocityX = para->getParH(level)->precursorBC.velocityX; + para->getParD(level)->precursorBC.velocityY = para->getParH(level)->precursorBC.velocityY; + para->getParD(level)->precursorBC.velocityZ = para->getParH(level)->precursorBC.velocityZ; + + for(auto reader : para->getParH(level)->velocityReader) + { + if(reader->getNumberOfQuantities() != para->getParD(level)->precursorBC.numberOfQuantities) throw std::runtime_error("Number of quantities in reader and number of quantities needed for precursor don't match!"); + } + + cudaMemoryManager->cudaCopyPrecursorBC(level); + cudaMemoryManager->cudaAllocPrecursorData(level); + + // read first timestep of precursor into next and copy to next on device + for(auto reader : para->getParH(level)->velocityReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 0); + } + + cudaMemoryManager->cudaCopyPrecursorData(level); + + //switch next with last pointers + real* tmp = para->getParD(level)->precursorBC.last; + para->getParD(level)->precursorBC.last = para->getParD(level)->precursorBC.next; + para->getParD(level)->precursorBC.next = tmp; + + //read second timestep of precursor into next and copy next to device + real nextTime = para->getParD(level)->precursorBC.nTRead*pow(2,-((real)level))*para->getTimeRatio(); + for(auto reader : para->getParH(level)->velocityReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, nextTime); + } + + cudaMemoryManager->cudaCopyPrecursorData(level); + + para->getParD(level)->precursorBC.nPrecursorReads = 1; + + + //switch next with current pointers + tmp = para->getParD(level)->precursorBC.current; + para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next; + para->getParD(level)->precursorBC.next = tmp; + + //start usual cycle of loading, i.e. read velocities of timestep after current and copy asynchronously to device + for(auto reader : para->getParH(level)->velocityReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 2*nextTime); + } + + cudaMemoryManager->cudaCopyPrecursorData(level); + + para->getParD(level)->precursorBC.nPrecursorReads = 2; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // advection - diffusion stuff + if (para->getDiffOn()==true){ + throw std::runtime_error(" Advection Diffusion not implemented for Precursor!"); + } + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + } + if (builder->hasGeometryValues()) { @@ -868,6 +963,50 @@ void GridGenerator::allocArrays_BoundaryQs() } } + for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { + const auto numberOfPrecursorNodes = int(builder->getPrecursorSize(i)); + if (numberOfPrecursorNodes > 0) + { + std::cout << "size velocity level " << i << " : " << numberOfPrecursorNodes << std::endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //preprocessing + real* QQ = para->getParH(i)->precursorBC.q27[0]; + unsigned int sizeQ = para->getParH(i)->precursorBC.numberOfBCnodes; + QforBoundaryConditions Q; + getPointersToBoundaryConditions(Q, QQ, sizeQ); + + builder->getPrecursorQs(Q.q27, i); + + if (para->getDiffOn()) { + throw std::runtime_error("Advection diffusion not implemented for Precursor!"); + ////////////////////////////////////////////////////////////////////////// + // para->getParH(i)->TempVel.kTemp = numberOfVelocityNodes; + // para->getParD(i)->TempVel.kTemp = numberOfVelocityNodes; + // std::cout << "Groesse TempVel.kTemp = " << para->getParH(i)->TempPress.kTemp << std::endl; + // std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl; + // std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl; + // ////////////////////////////////////////////////////////////////////////// + // cudaMemoryManager->cudaAllocTempVeloBC(i); + // //cout << "nach alloc " << std::endl; + // ////////////////////////////////////////////////////////////////////////// + // for (int m = 0; m < numberOfVelocityNodes; m++) + // { + // para->getParH(i)->TempVel.temp[m] = para->getTemperatureInit(); + // para->getParH(i)->TempVel.tempPulse[m] = para->getTemperatureBC(); + // para->getParH(i)->TempVel.velo[m] = para->getVelocity(); + // para->getParH(i)->TempVel.k[m] = para->getParH(i)->Qinflow.k[m]; + // } + // ////////////////////////////////////////////////////////////////////////// + // //cout << "vor copy " << std::endl; + // cudaMemoryManager->cudaCopyTempVeloBCHD(i); + // //cout << "nach copy " << std::endl; + ////////////////////////////////////////////////////////////////////////// + } + cudaMemoryManager->cudaCopyPrecursorBC(i); + } + } + + for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { const int numberOfGeometryNodes = builder->getGeometrySize(i); diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp index bff054eb174a0f5fa34119deedde6f1c9733d83c..b1c398638cff1ec1b6d52f59f8e773183e270331 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp +++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp @@ -35,6 +35,11 @@ void BoundaryConditionFactory::setStressBoundaryCondition(const StressBC boundar this->stressBoundaryCondition = boundaryConditionType; } +void BoundaryConditionFactory::setPrecursorBoundaryCondition(const PrecursorBC boundaryConditionType) +{ + this->precursorBoundaryCondition = boundaryConditionType; +} + boundaryCondition BoundaryConditionFactory::getVelocityBoundaryConditionPost(bool isGeometryBC) const { const VelocityBC &boundaryCondition = @@ -132,6 +137,22 @@ boundaryCondition BoundaryConditionFactory::getPressureBoundaryConditionPre() co case PressureBC::OutflowNonReflective: return QPressNoRhoDev27; break; + case PressureBC::OutflowNonReflectivePressureCorrection: + return QPressZeroRhoOutflowDev27; + default: + return nullptr; + } +} + +precursorBoundaryConditionFunc BoundaryConditionFactory::getPrecursorBoundaryConditionPost() const +{ + switch (this->precursorBoundaryCondition) { + case PrecursorBC::VelocityPrecursor: + return QPrecursorDevCompZeroPress; + break; + case PrecursorBC::DistributionsPrecursor: + return PrecursorDevDistributions; + break; default: return nullptr; } diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h index 9d6872c4847be72dff4be7137b774c8082e39e34..fcd309c1690d6d326ea6796a016514aba263527a 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h +++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h @@ -47,6 +47,7 @@ class Parameter; using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>; using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>; +using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real tRatio, real velocityRatio)>; class BoundaryConditionFactory { @@ -109,6 +110,8 @@ public: PressureNonEquilibriumCompressible, //! - OutflowNonReflective = outflow boundary condition, should be combined with VelocityAndPressureCompressible OutflowNonReflective, + //! - OutflowNonreflectivePressureCorrection = like OutflowNonReflective, but also reduces pressure overshoot + OutflowNonReflectivePressureCorrection, //! - NotSpecified = the user did not set a boundary condition NotSpecified }; @@ -128,11 +131,21 @@ public: // enum class OutflowBoundaryCondition {}; // TODO: // https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16 + enum class PrecursorBC { + //! - VelocityPrecursor + VelocityPrecursor, + //! - DisitributionsPrecursor + DistributionsPrecursor, + //! - NotSpecified = the user did not set a boundary condition + NotSpecified + }; + void setVelocityBoundaryCondition(const BoundaryConditionFactory::VelocityBC boundaryConditionType); void setNoSlipBoundaryCondition(const BoundaryConditionFactory::NoSlipBC boundaryConditionType); void setSlipBoundaryCondition(const BoundaryConditionFactory::SlipBC boundaryConditionType); void setPressureBoundaryCondition(const BoundaryConditionFactory::PressureBC boundaryConditionType); void setStressBoundaryCondition(const BoundaryConditionFactory::StressBC boundaryConditionType); + void setPrecursorBoundaryCondition(const BoundaryConditionFactory::PrecursorBC boundaryConditionType); //! \brief set a boundary condition for the geometry //! param boundaryConditionType: a velocity, no-slip or slip boundary condition //! \details suggestions for boundaryConditionType: @@ -152,6 +165,8 @@ public: [[nodiscard]] boundaryCondition getSlipBoundaryConditionPost(bool isGeometryBC = false) const; [[nodiscard]] boundaryCondition getPressureBoundaryConditionPre() const; [[nodiscard]] boundaryCondition getGeometryBoundaryConditionPost() const; + [[nodiscard]] precursorBoundaryConditionFunc getPrecursorBoundaryConditionPost() const; + [[nodiscard]] boundaryConditionWithParameter getStressBoundaryConditionPost() const; @@ -162,6 +177,7 @@ private: PressureBC pressureBoundaryCondition = PressureBC::NotSpecified; std::variant<VelocityBC, NoSlipBC, SlipBC> geometryBoundaryCondition = NoSlipBC::NoSlipImplicitBounceBack; StressBC stressBoundaryCondition = StressBC::NotSpecified; + PrecursorBC precursorBoundaryCondition = PrecursorBC::NotSpecified; // OutflowBoundaryConditon outflowBC // TODO: https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16 }; diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index ea385fd7e39c2c2b2e9bddb462229e163b541797..b84f7c7403c462fb74d7d9eccf94cd82b29818fe 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -5,15 +5,18 @@ #include <math.h> #include <Parameter/Parameter.h> + #include "Parameter/CudaStreamManager.h" #include "PreCollisionInteractor/ActuatorLine.h" #include "PreCollisionInteractor/ActuatorFarm.h" #include "PreCollisionInteractor/Probes/Probe.h" +#include <PreCollisionInteractor/PrecursorWriter.h> #include "Calculation/PorousMedia.h" #include "lbm/constants/NumericConstants.h" + void CudaMemoryManager::cudaAllocFull(int lev) { checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geo ), parameter->getParH(lev)->mem_size_int )); @@ -240,6 +243,7 @@ void CudaMemoryManager::cudaCopyVeloBC(int lev) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); } + void CudaMemoryManager::cudaFreeVeloBC(int lev) { checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityBC.q27[0] )); @@ -1656,6 +1660,133 @@ void CudaMemoryManager::cudaFreeWallModel(int lev, bool hasWallModelMonitor) } } + +//Precursor BC +void CudaMemoryManager::cudaAllocPrecursorBC(int lev) +{ + uint memSizeQInt = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(int); + uint memSizeQUint = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(uint); + uint memSizeQReal = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(real); + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.k, memSizeQInt)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.q27[0], parameter->getD3Qxx()*memSizeQReal)); + + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighborNT, memSizeQUint)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighborNB, memSizeQUint)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighborST, memSizeQUint)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighborSB, memSizeQUint)); + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weightsNT, memSizeQReal)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weightsNB, memSizeQReal)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weightsST, memSizeQReal)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weightsSB, memSizeQReal)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.k, memSizeQInt)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.q27[0], parameter->getD3Qxx()*memSizeQReal)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighborNT, memSizeQUint)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighborNB, memSizeQUint)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighborST, memSizeQUint)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighborSB, memSizeQUint)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weightsNT, memSizeQReal)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weightsNB, memSizeQReal)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weightsST, memSizeQReal)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weightsSB, memSizeQReal)); + + real memSize = memSizeQInt+4*memSizeQUint+(4+parameter->getD3Qxx())*memSizeQReal; + setMemsizeGPU(memSize, false); + +} + + +void CudaMemoryManager::cudaAllocPrecursorData(int lev) +{ + size_t size = parameter->getParH(lev)->precursorBC.numberOfPrecursorNodes*sizeof(real)*parameter->getParH(lev)->precursorBC.numberOfQuantities; + + checkCudaErrors( cudaStreamCreate(¶meter->getParH(lev)->precursorBC.stream) ); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.last, size)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.current, size)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.next, size)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.last, size)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.current, size)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.next, size)); + setMemsizeGPU(3*size, false); +} + + +void CudaMemoryManager::cudaCopyPrecursorBC(int lev) +{ + uint memSizeQInt = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(int); + uint memSizeQUint = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(uint); + uint memSizeQReal = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(real); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.k, parameter->getParH(lev)->precursorBC.k, memSizeQInt, cudaMemcpyHostToDevice)); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.q27[0], parameter->getParH(lev)->precursorBC.q27[0], memSizeQReal*parameter->getD3Qxx(), cudaMemcpyHostToDevice)); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighborNT, parameter->getParH(lev)->precursorBC.planeNeighborNT, memSizeQUint, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighborNB, parameter->getParH(lev)->precursorBC.planeNeighborNB, memSizeQUint, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighborST, parameter->getParH(lev)->precursorBC.planeNeighborST, memSizeQUint, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighborSB, parameter->getParH(lev)->precursorBC.planeNeighborSB, memSizeQUint, cudaMemcpyHostToDevice)); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weightsNT, parameter->getParH(lev)->precursorBC.weightsNT, memSizeQReal, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weightsNB, parameter->getParH(lev)->precursorBC.weightsNB, memSizeQReal, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weightsST, parameter->getParH(lev)->precursorBC.weightsST, memSizeQReal, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weightsSB, parameter->getParH(lev)->precursorBC.weightsSB, memSizeQReal, cudaMemcpyHostToDevice)); +} +void CudaMemoryManager::cudaCopyPrecursorData(int lev) +{ + auto prec = ¶meter->getParH(lev)->precursorBC; + size_t memSize = prec->numberOfPrecursorNodes*sizeof(real)*prec->numberOfQuantities; + checkCudaErrors( cudaStreamSynchronize(prec->stream) ); + checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, prec->stream)) ; +} + + +void CudaMemoryManager::cudaFreePrecursorBC(int lev) +{ + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.k)); + + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.q27[0])); + + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighborNT)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighborNB)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighborST)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighborSB)); + + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weightsNT)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weightsNB)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weightsST)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weightsSB)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.k)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.q27[0])); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighborNT)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighborNB)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighborST)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighborSB)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weightsNT)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weightsNB)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weightsST)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weightsSB)); +} + +void CudaMemoryManager::cudaFreePrecursorData(int lev) +{ + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.last)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.current)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.next)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.last)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.current)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.next)); +} //Test roundoff error void CudaMemoryManager::cudaAllocTestRE(int lev, unsigned int size) { @@ -3479,8 +3610,11 @@ void CudaMemoryManager::cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level) } void CudaMemoryManager::cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesArrayH, probe->getProbeStruct(level)->quantitiesArrayD, probe->getProbeStruct(level)->nArrays*sizeof(real)*probe->getProbeStruct(level)->nPoints, cudaMemcpyDeviceToHost) ); + auto probeStruct = probe->getProbeStruct(level); + + checkCudaErrors( cudaMemcpy(probeStruct->quantitiesArrayH, probeStruct->quantitiesArrayD, probeStruct->nArrays*sizeof(real)*probeStruct->nPoints, cudaMemcpyDeviceToHost) ); } + void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level) { checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesArrayH) ); @@ -3510,6 +3644,7 @@ void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(Statistic::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) ); checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(Statistic::LAST)*sizeof(int), cudaMemcpyDeviceToHost) ); } + void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level) { checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesH) ); @@ -3518,23 +3653,51 @@ void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int leve checkCudaErrors( cudaFree (probe->getProbeStruct(level)->arrayOffsetsD) ); } +void CudaMemoryManager::cudaAllocPrecursorWriter(PrecursorWriter* writer, int level) +{ + auto prec = writer->getPrecursorStruct(level); + size_t indSize = prec->nPoints*sizeof(uint); + checkCudaErrors( cudaStreamCreate(&prec->stream) ); + checkCudaErrors( cudaMallocHost((void**) &prec->indicesH, indSize)); + checkCudaErrors( cudaMalloc((void**) &prec->indicesD, indSize)); + size_t dataSize = prec->nPoints*sizeof(real)*prec->nQuantities; + size_t dataSizeH = dataSize * prec->timestepsPerFile; + + checkCudaErrors( cudaMallocHost((void**) &prec->dataH, dataSizeH)); + checkCudaErrors( cudaMallocHost((void**) &prec->bufferH, dataSizeH)); + checkCudaErrors( cudaMalloc((void**) &prec->dataD, dataSize)); + checkCudaErrors( cudaMalloc((void**) &prec->bufferD, dataSize)); + setMemsizeGPU(indSize+2*dataSize, false); +} +void CudaMemoryManager::cudaCopyPrecursorWriterIndicesHtoD(PrecursorWriter* writer, int level) +{ + checkCudaErrors( cudaMemcpy(writer->getPrecursorStruct(level)->indicesD, writer->getPrecursorStruct(level)->indicesH, writer->getPrecursorStruct(level)->nPoints*sizeof(uint), cudaMemcpyHostToDevice) ); +} +void CudaMemoryManager::cudaCopyPrecursorWriterOutputVariablesDtoH(PrecursorWriter* writer, int level) +{ + auto prec = writer->getPrecursorStruct(level); + int sizeTimestep = prec->nPoints*prec->nQuantities; + checkCudaErrors( cudaStreamSynchronize(prec->stream) ); + checkCudaErrors( cudaMemcpyAsync( &prec->bufferH[prec->timestepsBuffered*sizeTimestep], prec->bufferD, sizeof(real)*sizeTimestep, cudaMemcpyDeviceToHost, prec->stream)); +} +void CudaMemoryManager::cudaFreePrecursorWriter(PrecursorWriter* writer, int level) +{ + checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->indicesH)); + checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->indicesD)); - - - - - - - - + checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->dataH)); + checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->bufferH)); + checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->dataD)); + checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->bufferD)); +} CudaMemoryManager::CudaMemoryManager(std::shared_ptr<Parameter> parameter) : parameter(parameter) diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h index beb87ba639e160cc6be6e036f615dabe80d0b865..7d4b3af414f919c099f06d2387c896a1763f9231 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h @@ -21,6 +21,8 @@ class PorousMedia; class ActuatorLine; class ActuatorFarm; class Probe; +class VelocitySetter; +class PrecursorWriter; class VIRTUALFLUIDS_GPU_EXPORT CudaMemoryManager { @@ -183,6 +185,13 @@ public: void cudaCopyStressBC(int lev); void cudaFreeStressBC(int lev); + void cudaAllocPrecursorBC(int lev); + void cudaAllocPrecursorData(int lev); + void cudaCopyPrecursorBC(int lev); + void cudaCopyPrecursorData(int lev); + void cudaFreePrecursorBC(int lev); + void cudaFreePrecursorData(int lev); + void cudaAllocWallModel(int lev, bool hasWallModelMonitor); void cudaCopyWallModel(int lev, bool hasWallModelMonitor); void cudaFreeWallModel(int lev, bool hasWallModelMonitor); @@ -436,6 +445,12 @@ public: void cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level); void cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level); + //Precursor Writer + void cudaAllocPrecursorWriter(PrecursorWriter* writer, int level); + void cudaCopyPrecursorWriterIndicesHtoD(PrecursorWriter* writer, int level); + void cudaCopyPrecursorWriterOutputVariablesDtoH(PrecursorWriter* writer, int level); + void cudaFreePrecursorWriter(PrecursorWriter* writer, int level); + private: std::shared_ptr<Parameter> parameter; double memsizeGPU = 0.0; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index ee987ae23402ef304220349db77084cc341ccd5a..ceb70fb123c52c282200137a00522ff2b9905f86 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -900,6 +900,8 @@ void QPressDevDirDepBot27(unsigned int numberOfThreads, void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); +void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); + void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); void QPressDevOld27(unsigned int numberOfThreads, @@ -1007,6 +1009,14 @@ void VelSchlaffer27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep); +void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + +void PrecursorDevEQ27(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + +void PrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + +void QPrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + void QADDev7(unsigned int numberOfThreads, real* DD, real* DD7, diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index 94b9704b7ca57df4cd985f5aff9521b8a087b97f..b35e01eb997723eb12f5645857bc230536fe97fe 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -1080,7 +1080,7 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, bool isEvenTimestep); __global__ void QPressNoRhoDevice27( real* rhoBC, - real* DD, + real* distributions, int* k_Q, int* k_N, int numberOfBCnodes, @@ -1088,8 +1088,23 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction); + +__global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor); __global__ void QInflowScaleByPressDevice27( real* rhoBC, real* DD, @@ -1228,6 +1243,103 @@ __global__ void VelSchlaff27( int t, unsigned int size_Mat, bool isEvenTimestep); +__global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + int sizeQ, + real omega, + real* distributions, + real* subgridDistances, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real tRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + +__global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real omega, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real tRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + +__global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, + int numberOfBCNodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* fsLast, + real* fsNext, + real tRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); +__global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, + real* subgridDistances, + int sizeQ, + int numberOfBCNodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* fsLast, + real* fsNext, + real tRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + //Advection / Diffusion BCs __global__ void QAD7( real* DD, real* DD7, diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index 63fc5be0ebe5d4a26d4662ee8c0dddbc3098247a..489eb0a60ddb8bf9e1605a68e6d0f62211e26575 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -54,22 +54,9 @@ void KernelCasSP27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Casc_SP_27<<< grid, threads >>>(s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_Casc_SP_27<<< grid.grid, grid.threads >>>(s9, bcMatD, neighborX, neighborY, @@ -90,22 +77,9 @@ void KernelCasSPMS27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Casc_SP_MS_27<<< grid, threads >>>(s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_Casc_SP_MS_27<<< grid.grid, grid.threads >>>(s9, bcMatD, neighborX, neighborY, @@ -126,22 +100,9 @@ void KernelCasSPMSOHM27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Casc_SP_MS_OHM_27<<< grid, threads >>>( s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_Casc_SP_MS_OHM_27<<< grid.grid, grid.threads >>>( s9, bcMatD, neighborX, neighborY, @@ -165,22 +126,9 @@ void KernelKumCompSRTSP27( real* forces, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid, threads >>>( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid.grid, grid.threads >>>( omega, bcMatD, neighborX, @@ -209,22 +157,9 @@ void KernelKum1hSP27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_1h_SP_27<<< grid, threads >>>(omega, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>(omega, deltaPhi, angularVelocity, bcMatD, @@ -250,22 +185,9 @@ void KernelCascadeSP27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Cascade_SP_27<<< grid, threads >>>(s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>(s9, bcMatD, neighborX, neighborY, @@ -286,22 +208,10 @@ void KernelKumNewSP27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_New_SP_27<<< grid, threads >>>(s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>(s9, bcMatD, neighborX, neighborY, @@ -329,22 +239,9 @@ void KernelKumNewCompSP27(unsigned int numberOfThreads, //dim3 grid(Grid, 1, 1); //dim3 threads(numberOfThreads, 1, 1 ); - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - //LB_Kernel_Kum_New_Comp_SP_27<<< grid, threads >>>( s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + //LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>( s9, // bcMatD, // neighborX, // neighborY, @@ -375,22 +272,10 @@ void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfTh //dim3 grid(Grid, 1, 1); //dim3 threads(numberOfThreads, 1, 1 ); - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid, threads >>>( s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>( s9, bcMatD, neighborX, neighborY, @@ -420,22 +305,10 @@ void CumulantOnePreconditionedChimCompSP27( unsigned int numberOfThreads, //dim3 grid(Grid, 1, 1); //dim3 threads(numberOfThreads, 1, 1 ); - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid, threads >>>( s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>( s9, bcMatD, neighborX, neighborY, @@ -465,22 +338,10 @@ void CumulantOneChimCompSP27(unsigned int numberOfThreads, //dim3 grid(Grid, 1, 1); //dim3 threads(numberOfThreads, 1, 1 ); - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - Cumulant_One_chim_Comp_SP_27 <<< grid, threads >>>( s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>( s9, bcMatD, neighborX, neighborY, @@ -506,22 +367,10 @@ void KernelKumIsoTestSP27(unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_IsoTest_SP_27<<< grid, threads >>>(s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>(s9, bcMatD, neighborX, neighborY, @@ -545,22 +394,10 @@ void KernelKumCompSP27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_Comp_SP_27<<< grid, threads >>>(s9, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>(s9, bcMatD, neighborX, neighborY, @@ -587,22 +424,10 @@ void KernelPMCumOneCompSP27(unsigned int numberOfThreads, unsigned int* nodeIdsPorousMedia, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid, threads >>>(omega, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>(omega, neighborX, neighborY, neighborZ, @@ -642,22 +467,10 @@ void KernelWaleBySoniMalavCumAA2016CompSP27( //dim3 grid(Grid, 1, 1); //dim3 threads(numberOfThreads, 1, 1 ); - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid, threads >> >( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid.grid, grid.threads >> >( s9, bcMatD, neighborX, @@ -687,22 +500,9 @@ void KernelADincomp7( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_AD_Incomp_7<<< grid, threads >>>( diffusivity, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>( diffusivity, bcMatD, neighborX, neighborY, @@ -725,22 +525,9 @@ void KernelADincomp27( unsigned int numberOfThreads, int size_Mat, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_AD_Incomp_27<<< grid, threads >>>( diffusivity, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>( diffusivity, bcMatD, neighborX, neighborY, @@ -749,7 +536,7 @@ void KernelADincomp27( unsigned int numberOfThreads, DD27, size_Mat, EvenOrOdd); - getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed"); + getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void Init27( int myid, @@ -771,7 +558,7 @@ void Init27( int myid, dim3 threads ( grid_nx, 1, 1 ); dim3 grid ( grid_ny, grid_nz ); // Gitter fuer Kollision und Propagation - LBInit27<<< grid, threads >>> ( myid, + LBInit27<<< grid, threads >>> ( myid, numprocs, u0, geoD, @@ -786,7 +573,7 @@ void Init27( int myid, DD, level, maxlevel); - getLastCudaError("LBInit27 execution failed"); + getLastCudaError("LBInit27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void InitNonEqPartSP27( unsigned int numberOfThreads, @@ -804,22 +591,9 @@ void InitNonEqPartSP27( unsigned int numberOfThreads, real omega, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBInitNonEqPartSP27<<< grid, threads >>>( neighborX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>( neighborX, neighborY, neighborZ, neighborWSB, @@ -832,7 +606,7 @@ void InitNonEqPartSP27( unsigned int numberOfThreads, DD, omega, EvenOrOdd); - getLastCudaError("LBInitNonEqPartSP27 execution failed"); + getLastCudaError("LBInitNonEqPartSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void InitThS7( unsigned int numberOfThreads, @@ -848,22 +622,9 @@ void InitThS7( unsigned int numberOfThreads, real* DD7, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - InitAD7<<< grid, threads >>>( neighborX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + InitAD7<<< grid.grid, grid.threads >>>( neighborX, neighborY, neighborZ, geoD, @@ -874,7 +635,7 @@ void InitThS7( unsigned int numberOfThreads, size_Mat, DD7, EvenOrOdd); - getLastCudaError("InitAD7 execution failed"); + getLastCudaError("InitAD7 execution failed"); } ////////////////////////////////////////////////////////////////////////// void InitADDev27( unsigned int numberOfThreads, @@ -890,22 +651,9 @@ void InitADDev27( unsigned int numberOfThreads, real* DD27, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - InitAD27<<< grid, threads >>>(neighborX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + InitAD27<<< grid.grid, grid.threads >>>(neighborX, neighborY, neighborZ, geoD, @@ -916,7 +664,7 @@ void InitADDev27( unsigned int numberOfThreads, size_Mat, DD27, EvenOrOdd); - getLastCudaError("InitAD27 execution failed"); + getLastCudaError("InitAD27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void PostProcessorF3_2018Fehlberg( @@ -937,22 +685,9 @@ void PostProcessorF3_2018Fehlberg( real* forces, bool EvenOrOdd) { - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_PostProcessor_F3_2018_Fehlberg <<< grid, threads >>> ( omega, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> ( omega, bcMatD, neighborX, neighborY, @@ -967,7 +702,7 @@ void PostProcessorF3_2018Fehlberg( level, forces, EvenOrOdd); - getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed"); + getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMac27( real* vxD, @@ -988,7 +723,7 @@ void CalcMac27( real* vxD, dim3 threads ( grid_nx, 1, 1 ); dim3 grid ( grid_ny, grid_nz ); - LBCalcMac27<<< grid, threads >>> ( vxD, + LBCalcMac27<<< grid, threads >>> ( vxD, vyD, vzD, rhoD, @@ -999,7 +734,7 @@ void CalcMac27( real* vxD, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalcMac27 execution failed"); + getLastCudaError("LBCalcMac27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMacSP27( real* vxD, @@ -1016,22 +751,9 @@ void CalcMacSP27( real* vxD, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacSP27<<< grid, threads >>> ( vxD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcMacSP27<<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, rhoD, @@ -1043,7 +765,7 @@ void CalcMacSP27( real* vxD, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalcMacSP27 execution failed"); + getLastCudaError("LBCalcMacSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMacCompSP27( real* vxD, @@ -1060,22 +782,9 @@ void CalcMacCompSP27( real* vxD, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacCompSP27<<< grid, threads >>> ( vxD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcMacCompSP27<<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, rhoD, @@ -1087,7 +796,7 @@ void CalcMacCompSP27( real* vxD, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalcMacSP27 execution failed"); + getLastCudaError("LBCalcMacSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMacThS7( real* Conc, @@ -1100,22 +809,9 @@ void CalcMacThS7( real* Conc, real* DD7, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcConc7<<< grid, threads >>> (Conc, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + CalcConc7<<< grid.grid, grid.threads >>> (Conc, geoD, neighborX, neighborY, @@ -1123,7 +819,7 @@ void CalcMacThS7( real* Conc, size_Mat, DD7, isEvenTimestep); - getLastCudaError("CalcConc7 execution failed"); + getLastCudaError("CalcConc7 execution failed"); } ////////////////////////////////////////////////////////////////////////// void PlaneConcThS7(real* Conc, @@ -1138,22 +834,9 @@ void PlaneConcThS7(real* Conc, real* DD7, bool isEvenTimestep) { - int Grid = (numberOfPointskPC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - GetPlaneConc7<<< grid, threads >>> ( Conc, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC); + + GetPlaneConc7<<< grid.grid, grid.threads >>> ( Conc, kPC, numberOfPointskPC, geoD, @@ -1163,7 +846,7 @@ void PlaneConcThS7(real* Conc, size_Mat, DD7, isEvenTimestep); - getLastCudaError("GetPlaneConc7 execution failed"); + getLastCudaError("GetPlaneConc7 execution failed"); } ////////////////////////////////////////////////////////////////////////// void PlaneConcThS27(real* Conc, @@ -1178,22 +861,9 @@ void PlaneConcThS27(real* Conc, real* DD27, bool isEvenTimestep) { - int Grid = (numberOfPointskPC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - GetPlaneConc27<<< grid, threads >>> ( Conc, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC); + + GetPlaneConc27<<< grid.grid, grid.threads >>> ( Conc, kPC, numberOfPointskPC, geoD, @@ -1203,7 +873,7 @@ void PlaneConcThS27(real* Conc, size_Mat, DD27, isEvenTimestep); - getLastCudaError("GetPlaneConc27 execution failed"); + getLastCudaError("GetPlaneConc27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcConcentration27( unsigned int numberOfThreads, @@ -1216,22 +886,9 @@ void CalcConcentration27( unsigned int numberOfThreads, real* DD27, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcConc27<<< grid, threads >>> ( Conc, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + CalcConc27<<< grid.grid, grid.threads >>> ( Conc, geoD, neighborX, neighborY, @@ -1239,7 +896,7 @@ void CalcConcentration27( unsigned int numberOfThreads, size_Mat, DD27, isEvenTimestep); - getLastCudaError("CalcConc27 execution failed"); + getLastCudaError("CalcConc27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMedSP27( real* vxD, @@ -1256,22 +913,9 @@ void CalcMedSP27( real* vxD, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMedSP27<<< grid, threads >>> ( vxD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcMedSP27<<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, rhoD, @@ -1283,7 +927,7 @@ void CalcMedSP27( real* vxD, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalcMedSP27 execution failed"); + getLastCudaError("LBCalcMedSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMedCompSP27( real* vxD, @@ -1300,22 +944,9 @@ void CalcMedCompSP27( real* vxD, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMedCompSP27<<< grid, threads >>> ( vxD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcMedCompSP27<<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, rhoD, @@ -1327,7 +958,7 @@ void CalcMedCompSP27( real* vxD, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalcMedSP27 execution failed"); + getLastCudaError("LBCalcMedSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMedCompAD27( @@ -1347,22 +978,9 @@ void CalcMedCompAD27( real* DD_AD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LBCalcMedCompAD27 <<< grid, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, @@ -1394,22 +1012,9 @@ void CalcMacMedSP27( real* vxD, unsigned int numberOfThreads, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacMedSP27<<< grid, threads >>> ( vxD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcMacMedSP27<<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, rhoD, @@ -1421,7 +1026,7 @@ void CalcMacMedSP27( real* vxD, tdiff, size_Mat, isEvenTimestep); - getLastCudaError("LBCalcMacMedSP27 execution failed"); + getLastCudaError("LBCalcMacMedSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void ResetMedianValuesSP27( @@ -1434,22 +1039,10 @@ void ResetMedianValuesSP27( unsigned int numberOfThreads, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LBResetMedianValuesSP27 << < grid, threads >> > ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + + LBResetMedianValuesSP27 << < grid.grid, grid.threads >> > ( vxD, vyD, vzD, @@ -1471,22 +1064,9 @@ void ResetMedianValuesAD27( unsigned int numberOfThreads, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LBResetMedianValuesAD27 << < grid, threads >> > ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBResetMedianValuesAD27 << < grid.grid, grid.threads >> > ( vxD, vyD, vzD, @@ -1512,22 +1092,9 @@ void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc2ndMomentsIncompSP27<<< grid, threads >>> ( kxyFromfcNEQ, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> ( kxyFromfcNEQ, kyzFromfcNEQ, kxzFromfcNEQ, kxxMyyFromfcNEQ, @@ -1539,7 +1106,7 @@ void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed"); + getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ, @@ -1556,22 +1123,9 @@ void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc2ndMomentsCompSP27<<< grid, threads >>> (kxyFromfcNEQ, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> (kxyFromfcNEQ, kyzFromfcNEQ, kxzFromfcNEQ, kxxMyyFromfcNEQ, @@ -1583,7 +1137,7 @@ void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed"); + getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void Calc3rdMomentsIncompSP27(real* CUMbbb, @@ -1602,22 +1156,9 @@ void Calc3rdMomentsIncompSP27(real* CUMbbb, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc3rdMomentsIncompSP27<<< grid, threads >>> ( CUMbbb, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> ( CUMbbb, CUMabc, CUMbac, CUMbca, @@ -1631,7 +1172,7 @@ void Calc3rdMomentsIncompSP27(real* CUMbbb, DD, size_Mat, isEvenTimestep); - getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed"); + getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void Calc3rdMomentsCompSP27( real* CUMbbb, @@ -1650,22 +1191,9 @@ void Calc3rdMomentsCompSP27( real* CUMbbb, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc3rdMomentsCompSP27<<< grid, threads >>> (CUMbbb, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> (CUMbbb, CUMabc, CUMbac, CUMbca, @@ -1679,7 +1207,7 @@ void Calc3rdMomentsCompSP27( real* CUMbbb, DD, size_Mat, isEvenTimestep); - getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed"); + getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcHigherMomentsIncompSP27(real* CUMcbb, @@ -1701,22 +1229,9 @@ void CalcHigherMomentsIncompSP27(real* CUMcbb, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcHigherMomentsIncompSP27<<< grid, threads >>> (CUMcbb, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> (CUMcbb, CUMbcb, CUMbbc, CUMcca, @@ -1733,7 +1248,7 @@ void CalcHigherMomentsIncompSP27(real* CUMcbb, DD, size_Mat, isEvenTimestep); - getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed"); + getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcHigherMomentsCompSP27( real* CUMcbb, @@ -1755,22 +1270,9 @@ void CalcHigherMomentsCompSP27( real* CUMcbb, real* DD, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcHigherMomentsCompSP27<<< grid, threads >>> ( CUMcbb, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + + LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> ( CUMcbb, CUMbcb, CUMbbc, CUMcca, @@ -1787,7 +1289,7 @@ void CalcHigherMomentsCompSP27( real* CUMcbb, DD, size_Mat, isEvenTimestep); - getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed"); + getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void LBCalcMeasurePoints27(real* vxMP, @@ -1807,22 +1309,9 @@ void LBCalcMeasurePoints27(real* vxMP, unsigned int numberOfThreads, bool isEvenTimestep) { - int Grid = (numberOfPointskMP / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMeasurePoints<<< grid, threads >>> (vxMP, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP); + + LBCalcMeasurePoints<<< grid.grid, grid.threads >>> (vxMP, vyMP, vzMP, rhoMP, @@ -1837,7 +1326,7 @@ void LBCalcMeasurePoints27(real* vxMP, size_Mat, DD, isEvenTimestep); - getLastCudaError("LBCalcMeasurePoints execution failed"); + getLastCudaError("LBCalcMeasurePoints execution failed"); } ////////////////////////////////////////////////////////////////////////// void BcPress27( int nx, @@ -1853,10 +1342,10 @@ void BcPress27( int nx, unsigned int size_Mat, bool isEvenTimestep) { - dim3 threads ( grid_nx, 1, 1 ); - dim3 grid ( grid_ny, 1 ); + dim3 threads ( grid_nx, 1, 1 ); + dim3 grid ( grid_ny, 1 ); - LB_BC_Press_East27<<< grid, threads >>> ( nx, + LB_BC_Press_East27<<< grid, threads >>> ( nx, ny, tz, bcMatD, @@ -1866,7 +1355,7 @@ void BcPress27( int nx, DD, size_Mat, isEvenTimestep); - getLastCudaError("LB_BC_Press_East27 execution failed"); + getLastCudaError("LB_BC_Press_East27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void BcVel27(int nx, @@ -1885,10 +1374,10 @@ void BcVel27(int nx, real u0x, real om) { - dim3 threads ( grid_nx, 1, 1 ); - dim3 grid ( grid_ny, 1 ); + dim3 threads ( grid_nx, 1, 1 ); + dim3 grid ( grid_ny, 1 ); - LB_BC_Vel_West_27<<< grid, threads >>> ( nx, + LB_BC_Vel_West_27<<< grid, threads >>> ( nx, ny, nz, itz, @@ -1903,7 +1392,7 @@ void BcVel27(int nx, grid_nx, grid_ny, om); - getLastCudaError("LB_BC_Vel_West_27 execution failed"); + getLastCudaError("LB_BC_Vel_West_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QADPressDev7( unsigned int numberOfThreads, @@ -1922,22 +1411,9 @@ void QADPressDev7( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPress7<<< gridQ, threads >>>( DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPress7<<< grid.grid, grid.threads >>>( DD, DD7, temp, velo, @@ -1951,7 +1427,7 @@ void QADPressDev7( unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QADPress7 execution failed"); + getLastCudaError("QADPress7 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QADPressDev27(unsigned int numberOfThreads, @@ -1970,22 +1446,9 @@ void QADPressDev27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPress27<<< gridQ, threads >>>( DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPress27<<< grid.grid, grid.threads >>>( DD, DD27, temp, velo, @@ -1999,7 +1462,7 @@ void QADPressDev27(unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QADPress27 execution failed"); + getLastCudaError("QADPress27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QADPressNEQNeighborDev27( @@ -2017,22 +1480,9 @@ void QADPressNEQNeighborDev27( ) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPressNEQNeighbor27<<< gridQ, threads >>>( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>( DD, DD27, k_Q, @@ -2044,7 +1494,7 @@ void QADPressNEQNeighborDev27( size_Mat, isEvenTimestep ); - getLastCudaError("QADPressNEQNeighbor27 execution failed"); + getLastCudaError("QADPressNEQNeighbor27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QADVelDev7(unsigned int numberOfThreads, @@ -2063,22 +1513,9 @@ void QADVelDev7(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVel7<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVel7<<< grid.grid, grid.threads >>> ( DD, DD7, temp, @@ -2093,7 +1530,7 @@ void QADVelDev7(unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QADVel7 execution failed"); + getLastCudaError("QADVel7 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QADVelDev27( unsigned int numberOfThreads, @@ -2112,22 +1549,9 @@ void QADVelDev27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVel27<<< gridQ, threads >>> ( DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVel27<<< grid.grid, grid.threads >>> ( DD, DD27, temp, velo, @@ -2159,22 +1583,9 @@ void QADDev7(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QAD7<<< gridQ, threads >>> ( DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QAD7<<< grid.grid, grid.threads >>> ( DD, DD7, temp, diffusivity, @@ -2242,11 +1653,9 @@ void ADSlipVelDevComp( uint size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads) + 1; - dim3 gridQ(Grid, 1, 1); - dim3 threads(numberOfThreads, 1, 1); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); - AD_SlipVelDeviceComp << < gridQ, threads >> > ( + AD_SlipVelDeviceComp << < grid.grid, grid.threads >> > ( normalX, normalY, normalZ, @@ -2280,22 +1689,9 @@ void QADDirichletDev27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADDirichlet27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADDirichlet27<<< grid.grid, grid.threads >>> ( DD, DD27, temp, @@ -2327,22 +1723,9 @@ void QADBBDev27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADBB27<<< gridQ, threads >>> ( DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADBB27<<< grid.grid, grid.threads >>> ( DD, DD27, temp, diffusivity, @@ -2373,22 +1756,9 @@ void QNoSlipADincompDev7(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QNoSlipADincomp7<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QNoSlipADincomp7<<< grid.grid, grid.threads >>> ( DD, DD7, temp, @@ -2420,22 +1790,9 @@ void QNoSlipADincompDev27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QNoSlipADincomp27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QNoSlipADincomp27<<< grid.grid, grid.threads >>> ( DD, DD27, temp, @@ -2468,24 +1825,10 @@ void QADVeloIncompDev7( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVeloIncomp7<<< gridQ, threads >>> ( - DD, - DD7, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVeloIncomp7<<< grid.grid, grid.threads >>> ( DD, + DD7, temp, velo, diffusivity, @@ -2517,22 +1860,9 @@ void QADVeloIncompDev27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVeloIncomp27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVeloIncomp27<<< grid.grid, grid.threads >>> ( DD, DD27, temp, @@ -2566,22 +1896,9 @@ void QADPressIncompDev7( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPressIncomp7<<< gridQ, threads >>>( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPressIncomp7<<< grid.grid, grid.threads >>>( DD, DD7, temp, @@ -2615,24 +1932,10 @@ void QADPressIncompDev27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPressIncomp27<<< gridQ, threads >>>( - DD, - DD27, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPressIncomp27<<< grid.grid, grid.threads >>>(DD, + DD27, temp, velo, diffusivity, @@ -2701,22 +2004,9 @@ void QDevCompThinWalls27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QDeviceCompThinWallsPartOne27 <<< gridQ, threads >>> (DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QDeviceCompThinWallsPartOne27 <<< grid.grid, grid.threads >>> (DD, k_Q, QQ, numberOfBCnodes, @@ -2728,7 +2018,7 @@ void QDevCompThinWalls27(unsigned int numberOfThreads, isEvenTimestep); getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed"); - QThinWallsPartTwo27 <<< gridQ, threads >>> ( DD, + QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> ( DD, k_Q, QQ, numberOfBCnodes, @@ -2774,22 +2064,9 @@ void QDevIncompHighNu27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QDeviceIncompHighNu27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QDeviceIncompHighNu27<<< grid.grid, grid.threads >>> ( DD, k_Q, QQ, @@ -2815,22 +2092,9 @@ void QDevCompHighNu27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QDeviceCompHighNu27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QDeviceCompHighNu27<<< grid.grid, grid.threads >>> ( DD, k_Q, QQ, @@ -2880,22 +2144,9 @@ void QVelDeviceCouette27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDevCouette27<<< gridQ, threads >>> ( vx, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDevCouette27<<< grid.grid, grid.threads >>> ( vx, vy, vz, DD, @@ -2933,22 +2184,9 @@ void QVelDevice1h27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDev1h27<<< gridQ, threads >>> (nx, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDev1h27<<< grid.grid, grid.threads >>> (nx, ny, vx, vy, @@ -3010,22 +2248,9 @@ void QVelDevCompPlusSlip27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceCompPlusSlip27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceCompPlusSlip27<<< grid.grid, grid.threads >>> ( vx, vy, vz, @@ -3081,22 +2306,9 @@ void QVelDevCompThinWalls27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceCompThinWallsPartOne27<<< gridQ, threads >>> (vx, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceCompThinWallsPartOne27<<< grid.grid, grid.threads >>> (vx, vy, vz, DD, @@ -3111,7 +2323,7 @@ void QVelDevCompThinWalls27(unsigned int numberOfThreads, isEvenTimestep); getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed"); - QThinWallsPartTwo27 <<< gridQ, threads >>> ( + QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> ( DD, k_Q, QQ, @@ -3163,22 +2375,9 @@ void QVelDevIncompHighNu27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceIncompHighNu27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceIncompHighNu27<<< grid.grid, grid.threads >>> ( vx, vy, vz, @@ -3210,22 +2409,9 @@ void QVelDevCompHighNu27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceCompHighNu27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceCompHighNu27<<< grid.grid, grid.threads >>> ( vx, vy, vz, @@ -3256,22 +2442,9 @@ void QVeloDevEQ27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVeloDeviceEQ27<<< gridQ, threads >>> (VeloX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVeloDeviceEQ27<<< grid.grid, grid.threads >>> (VeloX, VeloY, VeloZ, DD, @@ -3301,22 +2474,9 @@ void QVeloStreetDevEQ27( uint size_Mat, bool isEvenTimestep) { - int Grid = (numberOfStreetNodes / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - QVeloStreetDeviceEQ27 << < gridQ, threads >> > ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfStreetNodes); + + QVeloStreetDeviceEQ27 << < grid.grid, grid.threads >> > ( veloXfraction, veloYfraction, naschVelo, @@ -3355,7 +2515,7 @@ void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, Q { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - + QSlipDeviceComp27TurbViscosity<<< grid, threads >>> ( parameterDevice->distributions.f[0], boundaryCondition->k, @@ -3395,7 +2555,7 @@ void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditi { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - + QSlipDeviceComp27<<< grid, threads >>> ( parameterDevice->distributions.f[0], boundaryCondition->k, @@ -3444,22 +2604,9 @@ void QSlipGeomDevComp27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QSlipGeomDeviceComp27<<< gridQ, threads >>> (DD, + vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes); + + QSlipGeomDeviceComp27<<< grid.grid, grid.threads >>> (DD, k_Q, QQ, numberOfBCnodes, @@ -3472,7 +2619,7 @@ void QSlipGeomDevComp27(unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QSlipGeomDeviceComp27 execution failed"); + getLastCudaError("QSlipGeomDeviceComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QSlipNormDevComp27(unsigned int numberOfThreads, @@ -3490,22 +2637,9 @@ void QSlipNormDevComp27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QSlipNormDeviceComp27<<< gridQ, threads >>> (DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QSlipNormDeviceComp27<<< grid.grid, grid.threads >>> (DD, k_Q, QQ, numberOfBCnodes, @@ -3676,22 +2810,9 @@ void QPressDevAntiBB27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceAntiBB27<<< gridQ, threads >>>( rhoBC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceAntiBB27<<< grid.grid, grid.threads >>>( rhoBC, vx, vy, vz, @@ -3705,7 +2826,7 @@ void QPressDevAntiBB27( unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QPressDeviceAntiBB27 execution failed"); + getLastCudaError("QPressDeviceAntiBB27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevFixBackflow27( unsigned int numberOfThreads, @@ -3720,22 +2841,9 @@ void QPressDevFixBackflow27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceFixBackflow27<<< gridQ, threads >>> ( rhoBC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceFixBackflow27<<< grid.grid, grid.threads >>> ( rhoBC, DD, k_Q, numberOfBCnodes, @@ -3745,7 +2853,7 @@ void QPressDevFixBackflow27( unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QPressDeviceFixBackflow27 execution failed"); + getLastCudaError("QPressDeviceFixBackflow27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevDirDepBot27( unsigned int numberOfThreads, @@ -3760,22 +2868,9 @@ void QPressDevDirDepBot27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceDirDepBot27<<< gridQ, threads >>> ( rhoBC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceDirDepBot27<<< grid.grid, grid.threads >>> ( rhoBC, DD, k_Q, numberOfBCnodes, @@ -3785,7 +2880,7 @@ void QPressDevDirDepBot27( unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QPressDeviceDirDepBot27 execution failed"); + getLastCudaError("QPressDeviceDirDepBot27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) @@ -3804,10 +2899,33 @@ void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryCondi parameterDevice->neighborY, parameterDevice->neighborZ, parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); + parameterDevice->isEvenTimestep, + vf::lbm::dir::DIR_P00); getLastCudaError("QPressNoRhoDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// +void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) +{ + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + + QPressZeroRhoOutflowDevice27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep, + vf::lbm::dir::DIR_P00, + parameterDevice->outflowPressureCorrectionFactor); + getLastCudaError("QPressZeroRhoOutflowDev27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); @@ -3841,22 +2959,9 @@ void QPressDevOld27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceOld27<<< gridQ, threads >>> ( rhoBC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceOld27<<< grid.grid, grid.threads >>> ( rhoBC, DD, k_Q, k_N, @@ -3867,7 +2972,7 @@ void QPressDevOld27( unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QPressDeviceOld27 execution failed"); + getLastCudaError("QPressDeviceOld27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) @@ -3941,22 +3046,9 @@ void QPressDevZero27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceZero27<<< gridQ, threads >>> (DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceZero27<<< grid.grid, grid.threads >>> (DD, k_Q, numberOfBCnodes, neighborX, @@ -3964,7 +3056,7 @@ void QPressDevZero27(unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QPressDeviceOld27 execution failed"); + getLastCudaError("QPressDeviceOld27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevFake27( unsigned int numberOfThreads, @@ -3980,22 +3072,10 @@ void QPressDevFake27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceFake27<<< gridQ, threads >>> (rhoBC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + + QPressDeviceFake27<<< grid.grid, grid.threads >>> (rhoBC, DD, k_Q, k_N, @@ -4040,22 +3120,9 @@ void QPressDev27_IntBB( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDevice27_IntBB<<< gridQ, threads >>> (rho, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDevice27_IntBB<<< grid.grid, grid.threads >>> (rho, DD, k_Q, QQ, @@ -4066,7 +3133,7 @@ void QPressDev27_IntBB( unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("QPressDevice27_IntBB execution failed"); + getLastCudaError("QPressDevice27_IntBB execution failed"); } // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29 ////////////////////////////////////////////////////////////////////////// @@ -4087,22 +3154,9 @@ void PressSchlaffer27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - PressSchlaff27<<< gridQ, threads >>>( rhoBC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + PressSchlaff27<<< grid.grid, grid.threads >>>( rhoBC, DD, vx0, vy0, @@ -4117,7 +3171,7 @@ void PressSchlaffer27(unsigned int numberOfThreads, neighborZ, size_Mat, isEvenTimestep); - getLastCudaError("PressSchlaff27 execution failed"); + getLastCudaError("PressSchlaff27 execution failed"); } // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29 ////////////////////////////////////////////////////////////////////////// @@ -4136,22 +3190,9 @@ void VelSchlaffer27( unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - VelSchlaff27<<< gridQ, threads >>>( t, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + VelSchlaff27<<< grid.grid, grid.threads >>>( t, DD, vz0, deltaVz0, @@ -4167,7 +3208,71 @@ void VelSchlaffer27( unsigned int numberOfThreads, getLastCudaError("VelSchlaff27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void PropVelo( unsigned int numberOfThreads, +void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +{ + + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>(boundaryCondition->k, boundaryCondition->numberOfBCnodes, boundaryCondition->numberOfPrecursorNodes, boundaryCondition->sizeQ, parameterDevice->omega, + parameterDevice->distributions.f[0], boundaryCondition->q27[0], + parameterDevice->neighborX, parameterDevice->neighborY, parameterDevice->neighborZ, + boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB, + boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, + boundaryCondition->last, boundaryCondition->current, + boundaryCondition->velocityX, boundaryCondition->velocityY, boundaryCondition->velocityZ, + tRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); + +} +////////////////////////////////////////////////////////////////////////// +void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +{ + + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>(boundaryCondition->k, boundaryCondition->numberOfBCnodes, boundaryCondition->numberOfPrecursorNodes, parameterDevice->omega, parameterDevice->distributions.f[0], + parameterDevice->neighborX, parameterDevice->neighborX, parameterDevice->neighborX, + boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB, + boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, + boundaryCondition->last, boundaryCondition->current, + boundaryCondition->velocityX, boundaryCondition->velocityY, boundaryCondition->velocityZ, + tRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + getLastCudaError("PrecursorDeviceEQ27 execution failed"); + +} +////////////////////////////////////////////////////////////////////////// +void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +{ + + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k, boundaryCondition->numberOfBCnodes, boundaryCondition->numberOfPrecursorNodes, parameterDevice->distributions.f[0], + parameterDevice->neighborX, parameterDevice->neighborY, parameterDevice->neighborZ, + boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB, + boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, + boundaryCondition->last, boundaryCondition->current, + tRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); + +} + +////////////////////////////////////////////////////////////////////////// +void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio) +{ + + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k, boundaryCondition->q27[0], boundaryCondition->sizeQ, boundaryCondition->numberOfBCnodes, boundaryCondition->numberOfPrecursorNodes, parameterDevice->distributions.f[0], + parameterDevice->neighborX, parameterDevice->neighborY, parameterDevice->neighborZ, + boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB, + boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB, + boundaryCondition->last, boundaryCondition->current, + tRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); + getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); + +} +////////////////////////////////////////////////////////////////////////// +extern "C" void PropVelo( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, @@ -4182,22 +3287,9 @@ void PropVelo( unsigned int numberOfThreads, real* DD, bool EvenOrOdd) { - int Grid = (size_Prop / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - PropellerBC<<< grid, threads >>>(neighborX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Prop); + + PropellerBC<<< grid.grid, grid.threads >>>(neighborX, neighborY, neighborZ, rho, @@ -4236,22 +3328,9 @@ void ScaleCF27( real* DC, unsigned int nyF, unsigned int numberOfThreads) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF27<<< gridINT_CF, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -4299,22 +3378,9 @@ void ScaleCFEff27(real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFEff27<<< gridINT_CF, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFEff27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -4363,22 +3429,9 @@ void ScaleCFLast27(real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFLast27<<< gridINT_CF, threads >>> (DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFLast27<<< grid.grid, grid.threads >>> (DC, DF, neighborCX, neighborCY, @@ -4427,22 +3480,9 @@ void ScaleCFpress27( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFpress27<<< gridINT_CF, threads >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFpress27<<< grid.grid, grid.threads >>>(DC, DF, neighborCX, neighborCY, @@ -4491,22 +3531,9 @@ void ScaleCF_Fix_27( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_Fix_27<<< gridINT_CF, threads >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_Fix_27<<< grid.grid, grid.threads >>>(DC, DF, neighborCX, neighborCY, @@ -4555,22 +3582,9 @@ void ScaleCF_Fix_comp_27( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_Fix_comp_27<<< gridINT_CF, threads >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_Fix_comp_27<<< grid.grid, grid.threads >>>( DC, DF, neighborCX, neighborCY, @@ -4620,22 +3634,9 @@ void ScaleCF_0817_comp_27(real* DC, OffCF offCF, CUstream_st *stream) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_0817_comp_27<<< gridINT_CF, threads, 0, stream >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>>( DC, DF, neighborCX, neighborCY, @@ -4685,22 +3686,9 @@ void ScaleCF_comp_D3Q27F3_2018(real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_comp_D3Q27F3_2018 <<< gridINT_CF, threads >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>>(DC, DF, G6, neighborCX, @@ -4752,22 +3740,9 @@ void ScaleCF_comp_D3Q27F3(real* DC, OffCF offCF, CUstream_st *stream) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_comp_D3Q27F3 <<< gridINT_CF, threads, 0, stream >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>>( DC, DF, G6, neighborCX, @@ -4817,22 +3792,9 @@ void ScaleCF_staggered_time_comp_27( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_staggered_time_comp_27<<< gridINT_CF, threads >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_staggered_time_comp_27<<< grid.grid, grid.threads >>>( DC, DF, neighborCX, neighborCY, @@ -4940,22 +3902,9 @@ void ScaleCF_RhoSq_3rdMom_comp_27(real* DC, OffCF offCF, CUstream_st *stream) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_RhoSq_3rdMom_comp_27<<< gridINT_CF, threads, 0, stream >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>( DC, DF, neighborCX, neighborCY, @@ -5005,22 +3954,9 @@ void ScaleCF_AA2016_comp_27(real* DC, OffCF offCF, CUstream_st *stream) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_AA2016_comp_27<<< gridINT_CF, threads, 0, stream >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC, DF, neighborCX, neighborCY, @@ -5069,22 +4005,9 @@ void ScaleCF_NSPress_27( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_NSPress_27<<< gridINT_CF, threads >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_NSPress_27<<< grid.grid, grid.threads >>>(DC, DF, neighborCX, neighborCY, @@ -5130,22 +4053,9 @@ void ScaleCFThSMG7( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFThSMG7<<< gridINT_CF, threads >>> (DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFThSMG7<<< grid.grid, grid.threads >>> (DC, DF, DD7C, DD7F, @@ -5187,22 +4097,9 @@ void ScaleCFThS7( real* DC, real diffusivity_fine, unsigned int numberOfThreads) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFThS7<<< gridINT_CF, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFThS7<<< grid.grid, grid.threads >>> ( DC, DF, DD7C, DD7F, @@ -5244,22 +4141,9 @@ void ScaleCFThS27( real* DC, unsigned int numberOfThreads, OffCF offCF) { - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFThS27<<< gridINT_CF, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFThS27<<< grid.grid, grid.threads >>> ( DC, DF, DD27C, DD27F, @@ -5304,22 +4188,10 @@ void ScaleFC27( real* DC, unsigned int nyF, unsigned int numberOfThreads) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC27<<< gridINT_FC, threads >>> ( DC, + + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -5367,22 +4239,9 @@ void ScaleFCEff27(real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCEff27<<< gridINT_FC, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCEff27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -5431,22 +4290,9 @@ void ScaleFCLast27(real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCLast27<<< gridINT_FC, threads >>> (DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCLast27<<< grid.grid, grid.threads >>> (DC, DF, neighborCX, neighborCY, @@ -5495,22 +4341,9 @@ void ScaleFCpress27(real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCpress27<<< gridINT_FC, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCpress27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -5559,22 +4392,9 @@ void ScaleFC_Fix_27(real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_Fix_27<<< gridINT_FC, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_Fix_27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -5623,22 +4443,9 @@ void ScaleFC_Fix_comp_27( real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_Fix_comp_27<<< gridINT_FC, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_Fix_comp_27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -5688,22 +4495,9 @@ void ScaleFC_0817_comp_27( real* DC, OffFC offFC, CUstream_st *stream) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_0817_comp_27<<< gridINT_FC, threads, 0, stream >>> (DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>> (DC, DF, neighborCX, neighborCY, @@ -5753,22 +4547,9 @@ void ScaleFC_comp_D3Q27F3_2018( real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_comp_D3Q27F3_2018 <<< gridINT_FC, threads >>> (DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>> (DC, DF, G6, neighborCX, @@ -5820,22 +4601,9 @@ void ScaleFC_comp_D3Q27F3( real* DC, OffFC offFC, CUstream_st *stream) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_comp_D3Q27F3 <<< gridINT_FC, threads, 0, stream >>> (DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>> (DC, DF, G6, neighborCX, @@ -5885,22 +4653,9 @@ void ScaleFC_staggered_time_comp_27( real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_staggered_time_comp_27<<< gridINT_FC, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_staggered_time_comp_27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -6007,22 +4762,9 @@ void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, OffFC offFC, CUstream_st *stream) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_RhoSq_3rdMom_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC, DF, neighborCX, neighborCY, @@ -6072,22 +4814,9 @@ void ScaleFC_AA2016_comp_27( real* DC, OffFC offFC, CUstream_st *stream) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_AA2016_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC, DF, neighborCX, neighborCY, @@ -6136,22 +4865,9 @@ void ScaleFC_NSPress_27(real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_NSPress_27<<< gridINT_FC, threads >>> ( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_NSPress_27<<< grid.grid, grid.threads >>> ( DC, DF, neighborCX, neighborCY, @@ -6197,22 +4913,9 @@ void ScaleFCThSMG7(real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCThSMG7<<< gridINT_FC, threads >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCThSMG7<<< grid.grid, grid.threads >>>( DC, DF, DD7C, DD7F, @@ -6254,22 +4957,9 @@ void ScaleFCThS7( real* DC, real diffusivity_coarse, unsigned int numberOfThreads) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCThS7<<< gridINT_FC, threads >>>(DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCThS7<<< grid.grid, grid.threads >>>(DC, DF, DD7C, DD7F, @@ -6311,22 +5001,9 @@ void ScaleFCThS27( real* DC, unsigned int numberOfThreads, OffFC offFC) { - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCThS27<<< gridINT_FC, threads >>>( DC, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCThS27<<< grid.grid, grid.threads >>>( DC, DF, DD27C, DD27F, @@ -6362,22 +5039,9 @@ void DragLiftPostD27(real* DD, bool isEvenTimestep, unsigned int numberOfThreads) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - DragLiftPost27<<< grid, threads >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + DragLiftPost27<<< grid.grid, grid.threads >>>(DD, k_Q, QQ, numberOfBCnodes, @@ -6406,22 +5070,9 @@ void DragLiftPreD27( real* DD, bool isEvenTimestep, unsigned int numberOfThreads) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - DragLiftPre27<<< grid, threads >>>( DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + DragLiftPre27<<< grid.grid, grid.threads >>>( DD, k_Q, QQ, numberOfBCnodes, @@ -6447,22 +5098,9 @@ void CalcCPtop27(real* DD, bool isEvenTimestep, unsigned int numberOfThreads) { - int Grid = (nonCp / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcCP27<<< grid, threads >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp); + + CalcCP27<<< grid.grid, grid.threads >>>(DD, cpIndex, nonCp, cpPress, @@ -6485,22 +5123,9 @@ void CalcCPbottom27( real* DD, bool isEvenTimestep, unsigned int numberOfThreads) { - int Grid = (nonCp / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcCP27<<< grid, threads >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp); + + CalcCP27<<< grid.grid, grid.threads >>>(DD, cpIndex, nonCp, cpPress, @@ -6524,22 +5149,9 @@ void GetSendFsPreDev27(real* DD, unsigned int numberOfThreads, cudaStream_t stream) { - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - getSendFsPre27<<< grid, threads, 0, stream >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD, bufferFs, sendIndex, buffmax, @@ -6563,22 +5175,9 @@ void GetSendFsPostDev27(real* DD, unsigned int numberOfThreads, cudaStream_t stream) { - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - getSendFsPost27<<< grid, threads, 0, stream >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD, bufferFs, sendIndex, buffmax, @@ -6602,22 +5201,9 @@ void SetRecvFsPreDev27(real* DD, unsigned int numberOfThreads, cudaStream_t stream) { - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - setRecvFsPre27<<< grid, threads, 0, stream >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD, bufferFs, recvIndex, buffmax, @@ -6641,22 +5227,9 @@ void SetRecvFsPostDev27(real* DD, unsigned int numberOfThreads, cudaStream_t stream) { - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - setRecvFsPost27<<< grid, threads, 0, stream >>>(DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD, bufferFs, recvIndex, buffmax, @@ -6680,22 +5253,9 @@ void getSendGsDevF3( bool isEvenTimestep, unsigned int numberOfThreads) { - int Grid = (buffmax / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - getSendGsF3 <<< grid, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + getSendGsF3 <<< grid.grid, grid.threads >>> ( G6, bufferGs, sendIndex, @@ -6720,22 +5280,9 @@ void setRecvGsDevF3( bool isEvenTimestep, unsigned int numberOfThreads) { - int Grid = (buffmax / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - setRecvGsF3 <<< grid, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + setRecvGsF3 <<< grid.grid, grid.threads >>> ( G6, bufferGs, recvIndex, @@ -6763,22 +5310,9 @@ void WallFuncDev27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - WallFunction27<<< gridQ, threads >>> ( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + WallFunction27<<< grid.grid, grid.threads >>> ( vx, vy, vz, @@ -6814,22 +5348,9 @@ void SetOutputWallVelocitySP27(unsigned int numberOfThreads, real* DD, bool isEvenTimestep) { - int Grid = (numberOfWallNodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBSetOutputWallVelocitySP27<<< gridQ, threads >>> ( vxD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfWallNodes); + + LBSetOutputWallVelocitySP27<<< grid.grid, grid.threads >>> ( vxD, vyD, vzD, vxWall, @@ -6862,22 +5383,9 @@ void GetVelotoForce27(unsigned int numberOfThreads, unsigned int size_Mat, bool isEvenTimestep) { - int Grid = (nonAtBC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - GetVeloforForcing27<<< gridQ, threads >>> (DD, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonAtBC); + + GetVeloforForcing27<<< grid.grid, grid.threads >>> (DD, bcIndex, nonAtBC, Vx, @@ -6911,27 +5419,14 @@ void InitParticlesDevice(real* coordX, unsigned int* neighborY, unsigned int* neighborZ, unsigned int* neighborWSB, - int level, + int level, unsigned int numberOfParticles, unsigned int size_Mat, unsigned int numberOfThreads) { - int Grid = (numberOfParticles / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - InitParticles<<< gridQ, threads >>> (coordX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles); + + InitParticles<<< grid.grid, grid.threads >>> (coordX, coordY, coordZ, coordParticleXlocal, @@ -6986,22 +5481,9 @@ void MoveParticlesDevice(real* coordX, unsigned int numberOfThreads, bool isEvenTimestep) { - int Grid = (numberOfParticles / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - MoveParticles<<< gridQ, threads >>> (coordX, + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles); + + MoveParticles<<< grid.grid, grid.threads >>> (coordX, coordY, coordZ, coordParticleXlocal, @@ -7035,22 +5517,8 @@ void initRandomDevice(curandState* state, unsigned int size_Mat, unsigned int numberOfThreads) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - initRandom<<< gridQ, threads >>> (state); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + initRandom<<< grid.grid, grid.threads >>> (state); getLastCudaError("initRandom execution failed"); } ////////////////////////////////////////////////////////////////////////// @@ -7059,22 +5527,8 @@ void generateRandomValuesDevice( curandState* state, real* randArray, unsigned int numberOfThreads) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - generateRandomValues<<< gridQ, threads >>> (state,randArray); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + generateRandomValues<<< grid.grid, grid.threads >>> (state,randArray); getLastCudaError("generateRandomValues execution failed"); } ////////////////////////////////////////////////////////////////////////// @@ -7097,22 +5551,8 @@ void CalcTurbulenceIntensityDevice( bool isEvenTimestep, uint numberOfThreads) { - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcTurbulenceIntensity<<<gridQ, threads>>>( + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat); + CalcTurbulenceIntensity<<<grid.grid, grid.threads>>>( vxx, vyy, vzz, diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu new file mode 100644 index 0000000000000000000000000000000000000000..a0daa5c229aabac360a71ae0f538f74124e3e963 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu @@ -0,0 +1,966 @@ +#include "LBM/LB.h" +#include <lbm/constants/NumericConstants.h> +#include <lbm/constants/D3Q27.h> +#include <lbm/MacroscopicQuantities.h> + +#include "VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh" +#include "VirtualFluids_GPU/GPU/KernelUtilities.h" + +using namespace vf::lbm::constant; +using namespace vf::lbm::dir; + +__global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + int sizeQ, + real omega, + real* distributions, + real* subgridDistances, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real tRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + const unsigned k = vf::gpu::getNodeIndex(); + + if(k>=numberOfBCnodes) return; + + //////////////////////////////////////////////////////////////////////////////// + // interpolation of velocity + real vxLastInterpd, vyLastInterpd, vzLastInterpd; + real vxNextInterpd, vyNextInterpd, vzNextInterpd; + + uint kNT = neighborsNT[k]; + real dNT = weightsNT[k]; + + real* vxLast = vLast; + real* vyLast = &vLast[numberOfPrecursorNodes]; + real* vzLast = &vLast[2*numberOfPrecursorNodes]; + + real* vxCurrent = vCurrent; + real* vyCurrent = &vCurrent[numberOfPrecursorNodes]; + real* vzCurrent = &vCurrent[2*numberOfPrecursorNodes]; + + if(dNT < 1e6) + { + uint kNB = neighborsNB[k]; + uint kST = neighborsST[k]; + uint kSB = neighborsSB[k]; + + real dNB = weightsNB[k]; + real dST = weightsST[k]; + real dSB = weightsSB[k]; + + real invWeightSum = 1.f/(dNT+dNB+dST+dSB); + + vxLastInterpd = (vxLast[kNT]*dNT + vxLast[kNB]*dNB + vxLast[kST]*dST + vxLast[kSB]*dSB)*invWeightSum; + vyLastInterpd = (vyLast[kNT]*dNT + vyLast[kNB]*dNB + vyLast[kST]*dST + vyLast[kSB]*dSB)*invWeightSum; + vzLastInterpd = (vzLast[kNT]*dNT + vzLast[kNB]*dNB + vzLast[kST]*dST + vzLast[kSB]*dSB)*invWeightSum; + + vxNextInterpd = (vxCurrent[kNT]*dNT + vxCurrent[kNB]*dNB + vxCurrent[kST]*dST + vxCurrent[kSB]*dSB)*invWeightSum; + vyNextInterpd = (vyCurrent[kNT]*dNT + vyCurrent[kNB]*dNB + vyCurrent[kST]*dST + vyCurrent[kSB]*dSB)*invWeightSum; + vzNextInterpd = (vzCurrent[kNT]*dNT + vzCurrent[kNB]*dNB + vzCurrent[kST]*dST + vzCurrent[kSB]*dSB)*invWeightSum; + } + else + { + vxLastInterpd = vxLast[kNT]; + vyLastInterpd = vyLast[kNT]; + vzLastInterpd = vzLast[kNT]; + + vxNextInterpd = vxCurrent[kNT]; + vyNextInterpd = vyCurrent[kNT]; + vzNextInterpd = vzCurrent[kNT]; + } + + // if(k==16300)s printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd); + real VeloX = (velocityX + (1.f-tRatio)*vxLastInterpd + tRatio*vxNextInterpd)/velocityRatio; + real VeloY = (velocityY + (1.f-tRatio)*vyLastInterpd + tRatio*vyNextInterpd)/velocityRatio; + real VeloZ = (velocityZ + (1.f-tRatio)*vzLastInterpd + tRatio*vzNextInterpd)/velocityRatio; + // From here on just a copy of QVelDeviceCompZeroPress + //////////////////////////////////////////////////////////////////////////////// + + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + + //////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions + //! + real f_W = (dist.f[DIR_P00 ])[ke ]; + real f_E = (dist.f[DIR_M00 ])[kw ]; + real f_S = (dist.f[DIR_0P0 ])[kn ]; + real f_N = (dist.f[DIR_0M0 ])[ks ]; + real f_B = (dist.f[DIR_00P ])[kt ]; + real f_T = (dist.f[DIR_00M ])[kb ]; + real f_SW = (dist.f[DIR_PP0 ])[kne ]; + real f_NE = (dist.f[DIR_MM0 ])[ksw ]; + real f_NW = (dist.f[DIR_PM0 ])[kse ]; + real f_SE = (dist.f[DIR_MP0 ])[knw ]; + real f_BW = (dist.f[DIR_P0P ])[kte ]; + real f_TE = (dist.f[DIR_M0M ])[kbw ]; + real f_TW = (dist.f[DIR_P0M ])[kbe ]; + real f_BE = (dist.f[DIR_M0P ])[ktw ]; + real f_BS = (dist.f[DIR_0PP ])[ktn ]; + real f_TN = (dist.f[DIR_0MM ])[kbs ]; + real f_TS = (dist.f[DIR_0PM ])[kbn ]; + real f_BN = (dist.f[DIR_0MP ])[kts ]; + real f_BSW = (dist.f[DIR_PPP ])[ktne ]; + real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP ])[ktse ]; + real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM ])[kbne ]; + real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM ])[kbse ]; + real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + + SubgridDistances27 subgridD; + getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes); + + //////////////////////////////////////////////////////////////////////////////// + real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); + + real vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + + (f_E - f_W)) / (c1o1 + drho); + + + real vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + + (f_N - f_S)) / (c1o1 + drho); + + real vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + + (f_T - f_B)) / (c1o1 + drho); + + + // if(k==16383 || k==0) printf("k %d kQ %d drho = %f u %f v %f w %f\n",k, KQK, drho, vx1, vx2, vx3); + real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho); + ////////////////////////////////////////////////////////////////////////// + + + //////////////////////////////////////////////////////////////////////////////// + //! - Update distributions with subgrid distance (q) between zero and one + real feq, q, velocityLB, velocityBC; + q = (subgridD.q[DIR_P00])[k]; + if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one + { + velocityLB = vx1; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = VeloX; + (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_M00])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = -VeloX; + (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_0P0])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = VeloY; + (dist.f[DIR_0M0])[DIR_0M0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_0M0])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = -VeloY; + (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_00P])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = VeloZ; + (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_00M])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = -VeloZ; + (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_PP0])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX + VeloY; + (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_MM0])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloY; + (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_PM0])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX - VeloY; + (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_MP0])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX + VeloY; + (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_P0P])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX + VeloZ; + (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_M0M])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_P0M])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX - VeloZ; + (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_M0P])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX + VeloZ; + (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0PP])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloY + VeloZ; + (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0MM])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloY - VeloZ; + (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0PM])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloY - VeloZ; + (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0MP])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloY + VeloZ; + (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_PPP])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX + VeloY + VeloZ; + (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MMM])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX - VeloY - VeloZ; + (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_PPM])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX + VeloY - VeloZ; + (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MMP])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX - VeloY + VeloZ; + (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_PMP])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX - VeloY + VeloZ; + (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MPM])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX + VeloY - VeloZ; + (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_PMM])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX - VeloY - VeloZ; + (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MPP])[k]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX + VeloY + VeloZ; + (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNW, f_BSE, feq, omega, drho, velocityBC, c1o216); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +__global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real omega, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real tRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + const unsigned k = vf::gpu::getNodeIndex(); + + if(k>=numberOfBCnodes) return; + + //////////////////////////////////////////////////////////////////////////////// + // interpolation of velocity + real vxLastInterpd, vyLastInterpd, vzLastInterpd; + real vxNextInterpd, vyNextInterpd, vzNextInterpd; + + uint kNT = neighborsNT[k]; + real dNT = weightsNT[k]; + + real* vxLast = vLast; + real* vyLast = &vLast[numberOfPrecursorNodes]; + real* vzLast = &vLast[2*numberOfPrecursorNodes]; + + real* vxCurrent = vCurrent; + real* vyCurrent = &vCurrent[numberOfPrecursorNodes]; + real* vzCurrent = &vCurrent[2*numberOfPrecursorNodes]; + + if(dNT < 1e6) + { + uint kNB = neighborsNB[k]; + uint kST = neighborsST[k]; + uint kSB = neighborsSB[k]; + + real dNB = weightsNB[k]; + real dST = weightsST[k]; + real dSB = weightsSB[k]; + + real invWeightSum = 1.f/(dNT+dNB+dST+dSB); + + vxLastInterpd = (vxLast[kNT]*dNT + vxLast[kNB]*dNB + vxLast[kST]*dST + vxLast[kSB]*dSB)*invWeightSum; + vyLastInterpd = (vyLast[kNT]*dNT + vyLast[kNB]*dNB + vyLast[kST]*dST + vyLast[kSB]*dSB)*invWeightSum; + vzLastInterpd = (vzLast[kNT]*dNT + vzLast[kNB]*dNB + vzLast[kST]*dST + vzLast[kSB]*dSB)*invWeightSum; + + vxNextInterpd = (vxCurrent[kNT]*dNT + vxCurrent[kNB]*dNB + vxCurrent[kST]*dST + vxCurrent[kSB]*dSB)*invWeightSum; + vyNextInterpd = (vyCurrent[kNT]*dNT + vyCurrent[kNB]*dNB + vyCurrent[kST]*dST + vyCurrent[kSB]*dSB)*invWeightSum; + vzNextInterpd = (vzCurrent[kNT]*dNT + vzCurrent[kNB]*dNB + vzCurrent[kST]*dST + vzCurrent[kSB]*dSB)*invWeightSum; + } + else + { + vxLastInterpd = vxLast[kNT]; + vyLastInterpd = vyLast[kNT]; + vzLastInterpd = vzLast[kNT]; + + vxNextInterpd = vxCurrent[kNT]; + vyNextInterpd = vyCurrent[kNT]; + vzNextInterpd = vzCurrent[kNT]; + } + + // if(k==16300) printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd); + real VeloX = (velocityX + (1.f-tRatio)*vxLastInterpd + tRatio*vxNextInterpd)/velocityRatio; + real VeloY = (velocityY + (1.f-tRatio)*vyLastInterpd + tRatio*vyNextInterpd)/velocityRatio; + real VeloZ = (velocityZ + (1.f-tRatio)*vzLastInterpd + tRatio*vzNextInterpd)/velocityRatio; + // From here on just a copy of QVelDeviceCompZeroPress + //////////////////////////////////////////////////////////////////////////////// + + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // based on BGK Plus Comp + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_ZERO = (dist.f[DIR_000])[kzero]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; + + //////////////////////////////////////////////////////////////////////////////// + //! - Set macroscopic quantities + //! + real drho = c0o1; + + real vx1 = VeloX; + + real vx2 = VeloY; + + real vx3 = VeloZ; + + real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3); + + //////////////////////////////////////////////////////////////////////////////// + f_ZERO = c8o27* (drho-(drho+c1o1)*cusq); + f_E = c2o27* (drho+(drho+c1o1)*(c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cusq)); + f_W = c2o27* (drho+(drho+c1o1)*(c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cusq)); + f_N = c2o27* (drho+(drho+c1o1)*(c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cusq)); + f_S = c2o27* (drho+(drho+c1o1)*(c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cusq)); + f_T = c2o27* (drho+(drho+c1o1)*(c3o1*( vx3)+c9o2*( vx3)*( vx3)-cusq)); + f_B = c2o27* (drho+(drho+c1o1)*(c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cusq)); + f_NE = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cusq)); + f_SW = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cusq)); + f_SE = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cusq)); + f_NW = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cusq)); + f_TE = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cusq)); + f_BW = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cusq)); + f_BE = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cusq)); + f_TW = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); + f_TN = c1o54* (drho+(drho+c1o1)*(c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cusq)); + f_BS = c1o54* (drho+(drho+c1o1)*(c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cusq)); + f_BN = c1o54* (drho+(drho+c1o1)*(c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cusq)); + f_TS = c1o54* (drho+(drho+c1o1)*(c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cusq)); + f_TNE = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq)); + f_BSW = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq)); + f_BNE = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq)); + f_TSW = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq)); + f_TSE = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq)); + f_BNW = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq)); + f_BSE = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); + f_TNW = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); + + //////////////////////////////////////////////////////////////////////////////// + //! write the new distributions to the bc nodes + //! + (dist.f[DIR_P00 ])[ke ] = f_W ; + (dist.f[DIR_M00 ])[kw ] = f_E ; + (dist.f[DIR_0P0 ])[kn ] = f_S ; + (dist.f[DIR_0M0 ])[ks ] = f_N ; + (dist.f[DIR_00P ])[kt ] = f_B ; + (dist.f[DIR_00M ])[kb ] = f_T ; + (dist.f[DIR_PP0 ])[kne ] = f_SW ; + (dist.f[DIR_MM0 ])[ksw ] = f_NE ; + (dist.f[DIR_PM0 ])[kse ] = f_NW ; + (dist.f[DIR_MP0 ])[knw ] = f_SE ; + (dist.f[DIR_P0P ])[kte ] = f_BW ; + (dist.f[DIR_M0M ])[kbw ] = f_TE ; + (dist.f[DIR_P0M ])[kbe ] = f_TW ; + (dist.f[DIR_M0P ])[ktw ] = f_BE ; + (dist.f[DIR_0PP ])[ktn ] = f_BS ; + (dist.f[DIR_0MM ])[kbs ] = f_TN ; + (dist.f[DIR_0PM ])[kbn ] = f_TS ; + (dist.f[DIR_0MP ])[kts ] = f_BN ; + (dist.f[DIR_000])[kzero] = f_ZERO; + (dist.f[DIR_PPP ])[ktne ] = f_BSW ; + (dist.f[DIR_MMP ])[ktsw ] = f_BNE ; + (dist.f[DIR_PMP ])[ktse ] = f_BNW ; + (dist.f[DIR_MPP ])[ktnw ] = f_BSE ; + (dist.f[DIR_PPM ])[kbne ] = f_TSW ; + (dist.f[DIR_MMM ])[kbsw ] = f_TNE ; + (dist.f[DIR_PMM ])[kbse ] = f_TNW ; + (dist.f[DIR_MPM ])[kbnw ] = f_TSE ; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +__global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* fsLast, + real* fsNext, + real tRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + const unsigned k = vf::gpu::getNodeIndex(); + + if(k>=numberOfBCnodes) return; + + uint kNT = neighborsNT[k]; + real dNT = weightsNT[k]; + + real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp; + real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp; + + real* f0Last = fsLast; + real* f1Last = &fsLast[ numberOfPrecursorNodes]; + real* f2Last = &fsLast[2*numberOfPrecursorNodes]; + real* f3Last = &fsLast[3*numberOfPrecursorNodes]; + real* f4Last = &fsLast[4*numberOfPrecursorNodes]; + real* f5Last = &fsLast[5*numberOfPrecursorNodes]; + real* f6Last = &fsLast[6*numberOfPrecursorNodes]; + real* f7Last = &fsLast[7*numberOfPrecursorNodes]; + real* f8Last = &fsLast[8*numberOfPrecursorNodes]; + + real* f0Next = fsNext; + real* f1Next = &fsNext[ numberOfPrecursorNodes]; + real* f2Next = &fsNext[2*numberOfPrecursorNodes]; + real* f3Next = &fsNext[3*numberOfPrecursorNodes]; + real* f4Next = &fsNext[4*numberOfPrecursorNodes]; + real* f5Next = &fsNext[5*numberOfPrecursorNodes]; + real* f6Next = &fsNext[6*numberOfPrecursorNodes]; + real* f7Next = &fsNext[7*numberOfPrecursorNodes]; + real* f8Next = &fsNext[8*numberOfPrecursorNodes]; + + + if(dNT<1e6) + { + uint kNB = neighborsNB[k]; + uint kST = neighborsST[k]; + uint kSB = neighborsSB[k]; + + real dNB = weightsNB[k]; + real dST = weightsST[k]; + real dSB = weightsSB[k]; + + real invWeightSum = 1.f/(dNT+dNB+dST+dSB); + + f0LastInterp = (f0Last[kNT]*dNT + f0Last[kNB]*dNB + f0Last[kST]*dST + f0Last[kSB]*dSB)*invWeightSum; + f0NextInterp = (f0Next[kNT]*dNT + f0Next[kNB]*dNB + f0Next[kST]*dST + f0Next[kSB]*dSB)*invWeightSum; + + f1LastInterp = (f1Last[kNT]*dNT + f1Last[kNB]*dNB + f1Last[kST]*dST + f1Last[kSB]*dSB)*invWeightSum; + f1NextInterp = (f1Next[kNT]*dNT + f1Next[kNB]*dNB + f1Next[kST]*dST + f1Next[kSB]*dSB)*invWeightSum; + + f2LastInterp = (f2Last[kNT]*dNT + f2Last[kNB]*dNB + f2Last[kST]*dST + f2Last[kSB]*dSB)*invWeightSum; + f2NextInterp = (f2Next[kNT]*dNT + f2Next[kNB]*dNB + f2Next[kST]*dST + f2Next[kSB]*dSB)*invWeightSum; + + f3LastInterp = (f3Last[kNT]*dNT + f3Last[kNB]*dNB + f3Last[kST]*dST + f3Last[kSB]*dSB)*invWeightSum; + f3NextInterp = (f3Next[kNT]*dNT + f3Next[kNB]*dNB + f3Next[kST]*dST + f3Next[kSB]*dSB)*invWeightSum; + + f4LastInterp = (f4Last[kNT]*dNT + f4Last[kNB]*dNB + f4Last[kST]*dST + f4Last[kSB]*dSB)*invWeightSum; + f4NextInterp = (f4Next[kNT]*dNT + f4Next[kNB]*dNB + f4Next[kST]*dST + f4Next[kSB]*dSB)*invWeightSum; + + f5LastInterp = (f5Last[kNT]*dNT + f5Last[kNB]*dNB + f5Last[kST]*dST + f5Last[kSB]*dSB)*invWeightSum; + f5NextInterp = (f5Next[kNT]*dNT + f5Next[kNB]*dNB + f5Next[kST]*dST + f5Next[kSB]*dSB)*invWeightSum; + + f6LastInterp = (f6Last[kNT]*dNT + f6Last[kNB]*dNB + f6Last[kST]*dST + f6Last[kSB]*dSB)*invWeightSum; + f6NextInterp = (f6Next[kNT]*dNT + f6Next[kNB]*dNB + f6Next[kST]*dST + f6Next[kSB]*dSB)*invWeightSum; + + f7LastInterp = (f7Last[kNT]*dNT + f7Last[kNB]*dNB + f7Last[kST]*dST + f7Last[kSB]*dSB)*invWeightSum; + f7NextInterp = (f7Next[kNT]*dNT + f7Next[kNB]*dNB + f7Next[kST]*dST + f7Next[kSB]*dSB)*invWeightSum; + + f8LastInterp = (f8Last[kNT]*dNT + f8Last[kNB]*dNB + f8Last[kST]*dST + f8Last[kSB]*dSB)*invWeightSum; + f8NextInterp = (f8Next[kNT]*dNT + f8Next[kNB]*dNB + f8Next[kST]*dST + f8Next[kSB]*dSB)*invWeightSum; + + } else { + f0LastInterp = f0Last[kNT]; + f1LastInterp = f1Last[kNT]; + f2LastInterp = f2Last[kNT]; + f3LastInterp = f3Last[kNT]; + f4LastInterp = f4Last[kNT]; + f5LastInterp = f5Last[kNT]; + f6LastInterp = f6Last[kNT]; + f7LastInterp = f7Last[kNT]; + f8LastInterp = f8Last[kNT]; + + f0NextInterp = f0Next[kNT]; + f1NextInterp = f1Next[kNT]; + f2NextInterp = f2Next[kNT]; + f3NextInterp = f3Next[kNT]; + f4NextInterp = f4Next[kNT]; + f5NextInterp = f5Next[kNT]; + f6NextInterp = f6Next[kNT]; + f7NextInterp = f7Next[kNT]; + f8NextInterp = f8Next[kNT]; + } + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[k]; + // unsigned int kzero= KQK; + unsigned int ke = KQK; + // unsigned int kw = neighborX[KQK]; + // unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + // unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + // unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + // unsigned int knw = kw; + // unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + // unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + // unsigned int ktn = KQK; + // unsigned int kbn = kb; + // unsigned int kts = ks; + unsigned int ktse = ks; + // unsigned int kbnw = kbw; + // unsigned int ktnw = kw; + unsigned int kbse = kbs; + // unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + // unsigned int kbsw = neighborZ[ksw]; + + dist.f[DIR_P00][ke] = f0LastInterp*(1.f-tRatio) + f0NextInterp*tRatio; + dist.f[DIR_PP0][kne] = f1LastInterp*(1.f-tRatio) + f1NextInterp*tRatio; + dist.f[DIR_PM0][kse] = f2LastInterp*(1.f-tRatio) + f2NextInterp*tRatio; + dist.f[DIR_P0P][kte] = f3LastInterp*(1.f-tRatio) + f3NextInterp*tRatio; + dist.f[DIR_P0M][kbe] = f4LastInterp*(1.f-tRatio) + f4NextInterp*tRatio; + dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-tRatio) + f5NextInterp*tRatio; + dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-tRatio) + f6NextInterp*tRatio; + dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-tRatio) + f7NextInterp*tRatio; + dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-tRatio) + f8NextInterp*tRatio; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +__global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, + real* subgridDistances, + int sizeQ, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weightsNT, + real* weightsNB, + real* weightsST, + real* weightsSB, + real* fsLast, + real* fsNext, + real tRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + const unsigned k = vf::gpu::getNodeIndex(); + + if(k>=numberOfBCnodes) return; + + uint kNT = neighborsNT[k]; + real dNT = weightsNT[k]; + + real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp; + real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp; + + real* f0Last = fsLast; + real* f1Last = &fsLast[ numberOfPrecursorNodes]; + real* f2Last = &fsLast[2*numberOfPrecursorNodes]; + real* f3Last = &fsLast[3*numberOfPrecursorNodes]; + real* f4Last = &fsLast[4*numberOfPrecursorNodes]; + real* f5Last = &fsLast[5*numberOfPrecursorNodes]; + real* f6Last = &fsLast[6*numberOfPrecursorNodes]; + real* f7Last = &fsLast[7*numberOfPrecursorNodes]; + real* f8Last = &fsLast[8*numberOfPrecursorNodes]; + + real* f0Next = fsNext; + real* f1Next = &fsNext[ numberOfPrecursorNodes]; + real* f2Next = &fsNext[2*numberOfPrecursorNodes]; + real* f3Next = &fsNext[3*numberOfPrecursorNodes]; + real* f4Next = &fsNext[4*numberOfPrecursorNodes]; + real* f5Next = &fsNext[5*numberOfPrecursorNodes]; + real* f6Next = &fsNext[6*numberOfPrecursorNodes]; + real* f7Next = &fsNext[7*numberOfPrecursorNodes]; + real* f8Next = &fsNext[8*numberOfPrecursorNodes]; + + + if(dNT<1e6) + { + uint kNB = neighborsNB[k]; + uint kST = neighborsST[k]; + uint kSB = neighborsSB[k]; + + real dNB = weightsNB[k]; + real dST = weightsST[k]; + real dSB = weightsSB[k]; + + real invWeightSum = 1.f/(dNT+dNB+dST+dSB); + + f0LastInterp = (f0Last[kNT]*dNT + f0Last[kNB]*dNB + f0Last[kST]*dST + f0Last[kSB]*dSB)*invWeightSum; + f0NextInterp = (f0Next[kNT]*dNT + f0Next[kNB]*dNB + f0Next[kST]*dST + f0Next[kSB]*dSB)*invWeightSum; + + f1LastInterp = (f1Last[kNT]*dNT + f1Last[kNB]*dNB + f1Last[kST]*dST + f1Last[kSB]*dSB)*invWeightSum; + f1NextInterp = (f1Next[kNT]*dNT + f1Next[kNB]*dNB + f1Next[kST]*dST + f1Next[kSB]*dSB)*invWeightSum; + + f2LastInterp = (f2Last[kNT]*dNT + f2Last[kNB]*dNB + f2Last[kST]*dST + f2Last[kSB]*dSB)*invWeightSum; + f2NextInterp = (f2Next[kNT]*dNT + f2Next[kNB]*dNB + f2Next[kST]*dST + f2Next[kSB]*dSB)*invWeightSum; + + f3LastInterp = (f3Last[kNT]*dNT + f3Last[kNB]*dNB + f3Last[kST]*dST + f3Last[kSB]*dSB)*invWeightSum; + f3NextInterp = (f3Next[kNT]*dNT + f3Next[kNB]*dNB + f3Next[kST]*dST + f3Next[kSB]*dSB)*invWeightSum; + + f4LastInterp = (f4Last[kNT]*dNT + f4Last[kNB]*dNB + f4Last[kST]*dST + f4Last[kSB]*dSB)*invWeightSum; + f4NextInterp = (f4Next[kNT]*dNT + f4Next[kNB]*dNB + f4Next[kST]*dST + f4Next[kSB]*dSB)*invWeightSum; + + f5LastInterp = (f5Last[kNT]*dNT + f5Last[kNB]*dNB + f5Last[kST]*dST + f5Last[kSB]*dSB)*invWeightSum; + f5NextInterp = (f5Next[kNT]*dNT + f5Next[kNB]*dNB + f5Next[kST]*dST + f5Next[kSB]*dSB)*invWeightSum; + + f6LastInterp = (f6Last[kNT]*dNT + f6Last[kNB]*dNB + f6Last[kST]*dST + f6Last[kSB]*dSB)*invWeightSum; + f6NextInterp = (f6Next[kNT]*dNT + f6Next[kNB]*dNB + f6Next[kST]*dST + f6Next[kSB]*dSB)*invWeightSum; + + f7LastInterp = (f7Last[kNT]*dNT + f7Last[kNB]*dNB + f7Last[kST]*dST + f7Last[kSB]*dSB)*invWeightSum; + f7NextInterp = (f7Next[kNT]*dNT + f7Next[kNB]*dNB + f7Next[kST]*dST + f7Next[kSB]*dSB)*invWeightSum; + + f8LastInterp = (f8Last[kNT]*dNT + f8Last[kNB]*dNB + f8Last[kST]*dST + f8Last[kSB]*dSB)*invWeightSum; + f8NextInterp = (f8Next[kNT]*dNT + f8Next[kNB]*dNB + f8Next[kST]*dST + f8Next[kSB]*dSB)*invWeightSum; + + } else { + f0LastInterp = f0Last[kNT]; + f1LastInterp = f1Last[kNT]; + f2LastInterp = f2Last[kNT]; + f3LastInterp = f3Last[kNT]; + f4LastInterp = f4Last[kNT]; + f5LastInterp = f5Last[kNT]; + f6LastInterp = f6Last[kNT]; + f7LastInterp = f7Last[kNT]; + f8LastInterp = f8Last[kNT]; + + f0NextInterp = f0Next[kNT]; + f1NextInterp = f1Next[kNT]; + f2NextInterp = f2Next[kNT]; + f3NextInterp = f3Next[kNT]; + f4NextInterp = f4Next[kNT]; + f5NextInterp = f5Next[kNT]; + f6NextInterp = f6Next[kNT]; + f7NextInterp = f7Next[kNT]; + f8NextInterp = f8Next[kNT]; + } + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[k]; + // unsigned int kzero= KQK; + unsigned int ke = KQK; + // unsigned int kw = neighborX[KQK]; + // unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + // unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + // unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + // unsigned int knw = kw; + // unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + // unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + // unsigned int ktn = KQK; + // unsigned int kbn = kb; + // unsigned int kts = ks; + unsigned int ktse = ks; + // unsigned int kbnw = kbw; + // unsigned int ktnw = kw; + unsigned int kbse = kbs; + // unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + // unsigned int kbsw = neighborZ[ksw]; + SubgridDistances27 qs; + getPointersToSubgridDistances(qs, subgridDistances, sizeQ); + + real q; + q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][ke] = f0LastInterp*(1.f-tRatio) + f0NextInterp*tRatio; + q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kne] = f1LastInterp*(1.f-tRatio) + f1NextInterp*tRatio; + q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kse] = f2LastInterp*(1.f-tRatio) + f2NextInterp*tRatio; + q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kte] = f3LastInterp*(1.f-tRatio) + f3NextInterp*tRatio; + q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kbe] = f4LastInterp*(1.f-tRatio) + f4NextInterp*tRatio; + q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-tRatio) + f5NextInterp*tRatio; + q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-tRatio) + f6NextInterp*tRatio; + q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-tRatio) + f7NextInterp*tRatio; + q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-tRatio) + f8NextInterp*tRatio; + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu index ccb2ce79c63515e59e4f9ae75016f44ced71a170..29e82196bdc2a22f03306b97a1ffd1bb6d5bc8a4 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu @@ -2,6 +2,9 @@ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" +#include "lbm/MacroscopicQuantities.h" +#include "Kernel/Utilities/DistributionHelper.cuh" + #include "KernelUtilities.h" using namespace vf::lbm::constant; @@ -2793,12 +2796,14 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, - - +__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real cs) +{ + return f1[dir] * cs + (c1o1 - cs) * f[dir]; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressNoRhoDevice27( real* rhoBC, - real* DD, +__global__ void QPressNoRhoDevice27( real* rhoBC, + real* distributions, int* k_Q, int* k_N, int numberOfBCnodes, @@ -2806,238 +2811,176 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - const unsigned k = nx*(ny*z + y) + x; + const unsigned k = vf::gpu::getNodeIndex(); ////////////////////////////////////////////////////////////////////////// - if(k<numberOfBCnodes) - { - //////////////////////////////////////////////////////////////////////////////// - //index - unsigned int KQK = k_Q[k]; - //unsigned int kzero= KQK; - unsigned int ke = KQK; - unsigned int kw = neighborX[KQK]; - unsigned int kn = KQK; - unsigned int ks = neighborY[KQK]; - unsigned int kt = KQK; - unsigned int kb = neighborZ[KQK]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = KQK; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = KQK; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = KQK; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = KQK; - unsigned int kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////// - //index1 - unsigned int K1QK = k_N[k]; - //unsigned int k1zero= K1QK; - unsigned int k1e = K1QK; - unsigned int k1w = neighborX[K1QK]; - unsigned int k1n = K1QK; - unsigned int k1s = neighborY[K1QK]; - unsigned int k1t = K1QK; - unsigned int k1b = neighborZ[K1QK]; - unsigned int k1sw = neighborY[k1w]; - unsigned int k1ne = K1QK; - unsigned int k1se = k1s; - unsigned int k1nw = k1w; - unsigned int k1bw = neighborZ[k1w]; - unsigned int k1te = K1QK; - unsigned int k1be = k1b; - unsigned int k1tw = k1w; - unsigned int k1bs = neighborZ[k1s]; - unsigned int k1tn = K1QK; - unsigned int k1bn = k1b; - unsigned int k1ts = k1s; - unsigned int k1tse = k1s; - unsigned int k1bnw = k1bw; - unsigned int k1tnw = k1w; - unsigned int k1bse = k1bs; - unsigned int k1tsw = k1sw; - unsigned int k1bne = k1b; - unsigned int k1tne = K1QK; - unsigned int k1bsw = neighborZ[k1sw]; - //////////////////////////////////////////////////////////////////////////////// - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f1_E = (D.f[DIR_P00 ])[k1e ]; - real f1_W = (D.f[DIR_M00 ])[k1w ]; - real f1_N = (D.f[DIR_0P0 ])[k1n ]; - real f1_S = (D.f[DIR_0M0 ])[k1s ]; - real f1_T = (D.f[DIR_00P ])[k1t ]; - real f1_B = (D.f[DIR_00M ])[k1b ]; - real f1_NE = (D.f[DIR_PP0 ])[k1ne ]; - real f1_SW = (D.f[DIR_MM0 ])[k1sw ]; - real f1_SE = (D.f[DIR_PM0 ])[k1se ]; - real f1_NW = (D.f[DIR_MP0 ])[k1nw ]; - real f1_TE = (D.f[DIR_P0P ])[k1te ]; - real f1_BW = (D.f[DIR_M0M ])[k1bw ]; - real f1_BE = (D.f[DIR_P0M ])[k1be ]; - real f1_TW = (D.f[DIR_M0P ])[k1tw ]; - real f1_TN = (D.f[DIR_0PP ])[k1tn ]; - real f1_BS = (D.f[DIR_0MM ])[k1bs ]; - real f1_BN = (D.f[DIR_0PM ])[k1bn ]; - real f1_TS = (D.f[DIR_0MP ])[k1ts ]; - //real f1_ZERO = (D.f[DIR_000])[k1zero]; - real f1_TNE = (D.f[DIR_PPP ])[k1tne ]; - real f1_TSW = (D.f[DIR_MMP ])[k1tsw ]; - real f1_TSE = (D.f[DIR_PMP ])[k1tse ]; - real f1_TNW = (D.f[DIR_MPP ])[k1tnw ]; - real f1_BNE = (D.f[DIR_PPM ])[k1bne ]; - real f1_BSW = (D.f[DIR_MMM ])[k1bsw ]; - real f1_BSE = (D.f[DIR_PMM ])[k1bse ]; - real f1_BNW = (D.f[DIR_MPM ])[k1bnw ]; - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f_E = (D.f[DIR_P00 ])[ke ]; - real f_W = (D.f[DIR_M00 ])[kw ]; - real f_N = (D.f[DIR_0P0 ])[kn ]; - real f_S = (D.f[DIR_0M0 ])[ks ]; - real f_T = (D.f[DIR_00P ])[kt ]; - real f_B = (D.f[DIR_00M ])[kb ]; - real f_NE = (D.f[DIR_PP0 ])[kne ]; - real f_SW = (D.f[DIR_MM0 ])[ksw ]; - real f_SE = (D.f[DIR_PM0 ])[kse ]; - real f_NW = (D.f[DIR_MP0 ])[knw ]; - real f_TE = (D.f[DIR_P0P ])[kte ]; - real f_BW = (D.f[DIR_M0M ])[kbw ]; - real f_BE = (D.f[DIR_P0M ])[kbe ]; - real f_TW = (D.f[DIR_M0P ])[ktw ]; - real f_TN = (D.f[DIR_0PP ])[ktn ]; - real f_BS = (D.f[DIR_0MM ])[kbs ]; - real f_BN = (D.f[DIR_0PM ])[kbn ]; - real f_TS = (D.f[DIR_0MP ])[kts ]; - //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_TNE = (D.f[DIR_PPP ])[ktne ]; - real f_TSW = (D.f[DIR_MMP ])[ktsw ]; - real f_TSE = (D.f[DIR_PMP ])[ktse ]; - real f_TNW = (D.f[DIR_MPP ])[ktnw ]; - real f_BNE = (D.f[DIR_PPM ])[kbne ]; - real f_BSW = (D.f[DIR_MMM ])[kbsw ]; - real f_BSE = (D.f[DIR_PMM ])[kbse ]; - real f_BNW = (D.f[DIR_MPM ])[kbnw ]; - ////////////////////////////////////////////////////////////////////////// + if(k>=numberOfBCnodes) return; - //real vx1, vx2, vx3, drho; - //real vx1, vx2, vx3, drho, drho1; - ////////////////////////////////////////////////////////////////////////// - //Dichte - // drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + - // f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + - // f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); - // drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + - // f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + - // f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); - - ////////////////////////////////////////////////////////////////////////// - //Ux + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + // unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + //index1 + unsigned int K1QK = k_N[k]; + //unsigned int k1zero= K1QK; + unsigned int k1e = K1QK; + unsigned int k1w = neighborX[K1QK]; + unsigned int k1n = K1QK; + unsigned int k1s = neighborY[K1QK]; + unsigned int k1t = K1QK; + unsigned int k1b = neighborZ[K1QK]; + unsigned int k1sw = neighborY[k1w]; + unsigned int k1ne = K1QK; + unsigned int k1se = k1s; + unsigned int k1nw = k1w; + unsigned int k1bw = neighborZ[k1w]; + unsigned int k1te = K1QK; + unsigned int k1be = k1b; + unsigned int k1tw = k1w; + unsigned int k1bs = neighborZ[k1s]; + unsigned int k1tn = K1QK; + unsigned int k1bn = k1b; + unsigned int k1ts = k1s; + unsigned int k1tse = k1s; + unsigned int k1bnw = k1bw; + unsigned int k1tnw = k1w; + unsigned int k1bse = k1bs; + unsigned int k1tsw = k1sw; + unsigned int k1bne = k1b; + unsigned int k1tne = K1QK; + unsigned int k1bsw = neighborZ[k1sw]; + //////////////////////////////////////////////////////////////////////////////// + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + real f[27], f1[27]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f1[DIR_P00] = (dist.f[DIR_P00])[k1e ]; + f1[DIR_M00] = (dist.f[DIR_M00])[k1w ]; + f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n ]; + f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s ]; + f1[DIR_00P] = (dist.f[DIR_00P])[k1t ]; + f1[DIR_00M] = (dist.f[DIR_00M])[k1b ]; + f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne ]; + f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw ]; + f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se ]; + f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw ]; + f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te ]; + f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw ]; + f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be ]; + f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw ]; + f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn ]; + f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs ]; + f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn ]; + f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts ]; + // f1[DIR_000] = (dist.f[DIR_000])[k1zero]; + f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ]; + f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ]; + f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ]; + f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ]; + f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ]; + f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ]; + f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ]; + f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f[DIR_P00] = (dist.f[DIR_P00])[ke ]; + f[DIR_M00] = (dist.f[DIR_M00])[kw ]; + f[DIR_0P0] = (dist.f[DIR_0P0])[kn ]; + f[DIR_0M0] = (dist.f[DIR_0M0])[ks ]; + f[DIR_00P] = (dist.f[DIR_00P])[kt ]; + f[DIR_00M] = (dist.f[DIR_00M])[kb ]; + f[DIR_PP0] = (dist.f[DIR_PP0])[kne ]; + f[DIR_MM0] = (dist.f[DIR_MM0])[ksw ]; + f[DIR_PM0] = (dist.f[DIR_PM0])[kse ]; + f[DIR_MP0] = (dist.f[DIR_MP0])[knw ]; + f[DIR_P0P] = (dist.f[DIR_P0P])[kte ]; + f[DIR_M0M] = (dist.f[DIR_M0M])[kbw ]; + f[DIR_P0M] = (dist.f[DIR_P0M])[kbe ]; + f[DIR_M0P] = (dist.f[DIR_M0P])[ktw ]; + f[DIR_0PP] = (dist.f[DIR_0PP])[ktn ]; + f[DIR_0MM] = (dist.f[DIR_0MM])[kbs ]; + f[DIR_0PM] = (dist.f[DIR_0PM])[kbn ]; + f[DIR_0MP] = (dist.f[DIR_0MP])[kts ]; + // f[DIR_000] = (dist.f[DIR_000])[kzero]; + f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ]; + f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ]; + f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ]; + f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ]; + f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ]; + f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ]; + f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ]; + f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ]; + ////////////////////////////////////////////////////////////////////////// - //vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + //real vx1, vx2, vx3, drho; + //real vx1, vx2, vx3, drho, drho1; + ////////////////////////////////////////////////////////////////////////// + ////Dichte + // drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + + // f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + + // f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); + // drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + // f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + // f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); + + ////////////////////////////////////////////////////////////////////////// + ////Ux + + //vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + // ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + // (f_E - f_W)) /(one + drho); - // vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + //vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + // ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + // (f_N - f_S)) /(one + drho); - // vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + //vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + // (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + // (f_T - f_B)) /(one + drho); - //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); + //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); - // ////////////////////////////////////////////////////////////////////////// - ////real omega = om1; + ////////////////////////////////////////////////////////////////////////// + ////real omega = om1; // real cusq = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); // ////////////////////////////////////////////////////////////////////////// - ////T�st MK - ////if(vx1 < zero) vx1 = zero; + ////T�st MK + ////if(vx1 < zero) vx1 = zero; // ////////////////////////////////////////////////////////////////////////// // real fZERO = c8over27* (drho1-(one + drho1)*(cusq)) ; // real fE = c2over27* (drho1+(one + drho1)*(three*( vx1 )+c9over2*( vx1 )*( vx1 )-cusq)); @@ -3050,10 +2993,75 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, // real fSW = c1over54* (drho1+(one + drho1)*(three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cusq)); // real fSE = c1over54* (drho1+(one + drho1)*(three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cusq)); // real fNW = c1over54* (drho1+(one + drho1)*(three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cusq)); - // real fTE = c1over54* (drho1+(one + drho1)*(three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cusq)); - // real fBW = c1over54* (drho1+(one + drho1)*(three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cusq)); - // real fBE = c1over54* (drho1+(one + drho1)*(three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cusq)); - // real fTW = c1over54* (drho1+(one + drho1)*(three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); + // real fTE ///////////////////////////////////////////////////////////// + //with velocity + //if(true){//vx1 >= zero){ + // real csMvx = one / sqrtf(three) - vx1; + // //real csMvy = one / sqrtf(three) - vx2; + // /////////////////////////////////////////// + // // X + // f_W = f1_W * csMvx + (one - csMvx) * f_W ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_NW = f1_NW * csMvx + (one - csMvx) * f_NW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_SW = f1_SW * csMvx + (one - csMvx) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_TW = f1_TW * csMvx + (one - csMvx) * f_TW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_BW = f1_BW * csMvx + (one - csMvx) * f_BW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); + // /////////////////////////////////////////// + // // Y + // //f_S = f1_S * csMvy + (one - csMvy) * f_S ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); + // //f_S = f1_S * csMvy + (one - csMvy) * f_S; + // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE; + // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW; + // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS; + // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS; + // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE; + // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW; + // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE; + // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW; + // ////////////////////////////////////////////////////////////////////////// + //} + //else + //{ + // /////////////////////////////////////////// + // // X + // vx1 = vx1 * 0.9; + // f_W = f_E - six * c2over27 * ( vx1 ); + // f_NW = f_SE - six * c1over54 * ( vx1-vx2 ); + // f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); + // f_TW = f_BE - six * c1over54 * ( vx1 -vx3); + // f_BW = f_TE - six * c1over54 * ( vx1 +vx3); + // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3); + // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); + // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3); + // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); + // /////////////////////////////////////////// + // // Y + // //vx2 = vx2 * 0.9; + // //f_S = f_N - six * c2over27 * ( vx2 ); + // //f_SE = f_NW - six * c1over54 * (-vx1+vx2 ); + // //f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); + // //f_TS = f_BN - six * c1over54 * ( vx2-vx3); + // //f_BS = f_TN - six * c1over54 * ( vx2+vx3); + // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3); + // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); + // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3); + // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); + // /////////////////////////////////////////// + //} + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // = c1over54* (drho1+(one + drho1)*(three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); // real fTN = c1over54* (drho1+(one + drho1)*(three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cusq)); // real fBS = c1over54* (drho1+(one + drho1)*(three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cusq)); // real fBN = c1over54* (drho1+(one + drho1)*(three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cusq)); @@ -3067,222 +3075,322 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, // real fBSE = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); // real fTNW = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); - real cs = c1o1 / sqrtf(c3o1); - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //no velocity - ////////////////////////////////////////// - f_E = f1_E * cs + (c1o1 - cs) * f_E ; - f_W = f1_W * cs + (c1o1 - cs) * f_W ; - f_N = f1_N * cs + (c1o1 - cs) * f_N ; - f_S = f1_S * cs + (c1o1 - cs) * f_S ; - f_T = f1_T * cs + (c1o1 - cs) * f_T ; - f_B = f1_B * cs + (c1o1 - cs) * f_B ; - f_NE = f1_NE * cs + (c1o1 - cs) * f_NE ; - f_SW = f1_SW * cs + (c1o1 - cs) * f_SW ; - f_SE = f1_SE * cs + (c1o1 - cs) * f_SE ; - f_NW = f1_NW * cs + (c1o1 - cs) * f_NW ; - f_TE = f1_TE * cs + (c1o1 - cs) * f_TE ; - f_BW = f1_BW * cs + (c1o1 - cs) * f_BW ; - f_BE = f1_BE * cs + (c1o1 - cs) * f_BE ; - f_TW = f1_TW * cs + (c1o1 - cs) * f_TW ; - f_TN = f1_TN * cs + (c1o1 - cs) * f_TN ; - f_BS = f1_BS * cs + (c1o1 - cs) * f_BS ; - f_BN = f1_BN * cs + (c1o1 - cs) * f_BN ; - f_TS = f1_TS * cs + (c1o1 - cs) * f_TS ; - f_TNE = f1_TNE * cs + (c1o1 - cs) * f_TNE ; - f_TSW = f1_TSW * cs + (c1o1 - cs) * f_TSW ; - f_TSE = f1_TSE * cs + (c1o1 - cs) * f_TSE ; - f_TNW = f1_TNW * cs + (c1o1 - cs) * f_TNW ; - f_BNE = f1_BNE * cs + (c1o1 - cs) * f_BNE ; - f_BSW = f1_BSW * cs + (c1o1 - cs) * f_BSW ; - f_BSE = f1_BSE * cs + (c1o1 - cs) * f_BSE ; - f_BNW = f1_BNW * cs + (c1o1 - cs) * f_BNW ; - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //with velocity - //if(true){//vx1 >= zero){ - // real csMvx = one / sqrtf(three) - vx1; - // //real csMvy = one / sqrtf(three) - vx2; - // /////////////////////////////////////////// - // // X - // f_W = f1_W * csMvx + (one - csMvx) * f_W ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_NW = f1_NW * csMvx + (one - csMvx) * f_NW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_SW = f1_SW * csMvx + (one - csMvx) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TW = f1_TW * csMvx + (one - csMvx) * f_TW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BW = f1_BW * csMvx + (one - csMvx) * f_BW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // /////////////////////////////////////////// - // // Y - // //f_S = f1_S * csMvy + (one - csMvy) * f_S ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_S = f1_S * csMvy + (one - csMvy) * f_S; - // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE; - // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW; - // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS; - // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS; - // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE; - // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW; - // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE; - // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW; - // ////////////////////////////////////////////////////////////////////////// - //} - //else - //{ - // /////////////////////////////////////////// - // // X - // vx1 = vx1 * 0.9; - // f_W = f_E - six * c2over27 * ( vx1 ); - // f_NW = f_SE - six * c1over54 * ( vx1-vx2 ); - // f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); - // f_TW = f_BE - six * c1over54 * ( vx1 -vx3); - // f_BW = f_TE - six * c1over54 * ( vx1 +vx3); - // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3); - // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); - // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3); - // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); - // /////////////////////////////////////////// - // // Y - // //vx2 = vx2 * 0.9; - // //f_S = f_N - six * c2over27 * ( vx2 ); - // //f_SE = f_NW - six * c1over54 * (-vx1+vx2 ); - // //f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); - // //f_TS = f_BN - six * c1over54 * ( vx2-vx3); - // //f_BS = f_TN - six * c1over54 * ( vx2+vx3); - // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3); - // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); - // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3); - // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); - // /////////////////////////////////////////// - //} - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real cs = c1o1 / sqrtf(c3o1); - ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - ////////////////////////////////////////////////////////////////////////// - //__syncthreads(); - // -X - //(D.f[DIR_P00 ])[ke ] = f_E ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_PP0 ])[kne ] = f_NE ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_P0P ])[kte ] = f_TE ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_PPP ])[ktne ] = f_TNE ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - // X - (D.f[DIR_M00 ])[kw ] = f_W ; - (D.f[DIR_MM0 ])[ksw ] = f_SW ; - (D.f[DIR_MP0 ])[knw ] = f_NW ; - (D.f[DIR_M0M ])[kbw ] = f_BW ; - (D.f[DIR_M0P ])[ktw ] = f_TW ; - (D.f[DIR_MMP ])[ktsw ] = f_TSW ; - (D.f[DIR_MPP ])[ktnw ] = f_TNW ; - (D.f[DIR_MMM ])[kbsw ] = f_BSW ; - (D.f[DIR_MPM ])[kbnw ] = f_BNW ; - // Y - //(D.f[DIR_0M0 ])[ks ] = f_S ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_MM0 ])[ksw ] = f_SW ; - //(D.f[DIR_0MP ])[kts ] = f_TS ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_MMP ])[ktsw ] = f_TSW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - // Z - //(D.f[DIR_00M ])[kb ] = f_B ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_M0M ])[kbw ] = f_BW ; - //(D.f[DIR_0PM ])[kbn ] = f_BN ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - //(D.f[DIR_MPM ])[kbnw ] = f_BNW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + switch(direction) + { + case MZZ: + (dist.f[DIR_P00])[ke ] = computeOutflowDistribution(f, f1, DIR_P00, cs); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, cs); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, cs); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, cs); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + break; + + case PZZ: + (dist.f[DIR_M00])[kw ] = computeOutflowDistribution(f, f1, DIR_M00, cs); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, cs); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, cs); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, cs); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + break; + + case ZMZ: + (dist.f[DIR_0P0])[kn ] = computeOutflowDistribution(f, f1, DIR_0P0, cs); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, cs); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, cs); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, cs); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + break; + + case ZPZ: + (dist.f[DIR_0M0])[ks ] = computeOutflowDistribution(f, f1, DIR_0M0, cs); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, cs); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, cs); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, cs); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + break; + + case ZZM: + (dist.f[DIR_00P])[kt ] = computeOutflowDistribution(f, f1, DIR_00P, cs); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, cs); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, cs); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, cs); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + break; + + case ZZP: + (dist.f[DIR_00M])[kb ] = computeOutflowDistribution(f, f1, DIR_00M, cs); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, cs); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, cs); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, cs); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + break; + default: + break; } } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - - +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight) +{ + return f1[dir ] * cs + (c1o1 - cs) * f[dir ] - weight *rhoCorrection; +} +__global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor) +{ + //////////////////////////////////////////////////////////////////////////////// + const unsigned k = vf::gpu::getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if(k>=numberOfBCnodes) return; + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + //index1 + unsigned int K1QK = k_N[k]; + // unsigned int k1zero= K1QK; + unsigned int k1e = K1QK; + unsigned int k1w = neighborX[K1QK]; + unsigned int k1n = K1QK; + unsigned int k1s = neighborY[K1QK]; + unsigned int k1t = K1QK; + unsigned int k1b = neighborZ[K1QK]; + unsigned int k1sw = neighborY[k1w]; + unsigned int k1ne = K1QK; + unsigned int k1se = k1s; + unsigned int k1nw = k1w; + unsigned int k1bw = neighborZ[k1w]; + unsigned int k1te = K1QK; + unsigned int k1be = k1b; + unsigned int k1tw = k1w; + unsigned int k1bs = neighborZ[k1s]; + unsigned int k1tn = K1QK; + unsigned int k1bn = k1b; + unsigned int k1ts = k1s; + unsigned int k1tse = k1s; + unsigned int k1bnw = k1bw; + unsigned int k1tnw = k1w; + unsigned int k1bse = k1bs; + unsigned int k1tsw = k1sw; + unsigned int k1bne = k1b; + unsigned int k1tne = K1QK; + unsigned int k1bsw = neighborZ[k1sw]; + //////////////////////////////////////////////////////////////////////////////// + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + real f1[27], f[27]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f1[DIR_P00] = (dist.f[DIR_P00])[k1e ]; + f1[DIR_M00] = (dist.f[DIR_M00])[k1w ]; + f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n ]; + f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s ]; + f1[DIR_00P] = (dist.f[DIR_00P])[k1t ]; + f1[DIR_00M] = (dist.f[DIR_00M])[k1b ]; + f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne ]; + f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw ]; + f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se ]; + f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw ]; + f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te ]; + f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw ]; + f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be ]; + f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw ]; + f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn ]; + f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs ]; + f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn ]; + f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts ]; + // f1[DIR_000] = (dist.f[DIR_000])[k1zero]; + f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ]; + f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ]; + f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ]; + f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ]; + f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ]; + f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ]; + f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ]; + f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f[DIR_P00] = (dist.f[DIR_P00])[ke ]; + f[DIR_M00] = (dist.f[DIR_M00])[kw ]; + f[DIR_0P0] = (dist.f[DIR_0P0])[kn ]; + f[DIR_0M0] = (dist.f[DIR_0M0])[ks ]; + f[DIR_00P] = (dist.f[DIR_00P])[kt ]; + f[DIR_00M] = (dist.f[DIR_00M])[kb ]; + f[DIR_PP0] = (dist.f[DIR_PP0])[kne ]; + f[DIR_MM0] = (dist.f[DIR_MM0])[ksw ]; + f[DIR_PM0] = (dist.f[DIR_PM0])[kse ]; + f[DIR_MP0] = (dist.f[DIR_MP0])[knw ]; + f[DIR_P0P] = (dist.f[DIR_P0P])[kte ]; + f[DIR_M0M] = (dist.f[DIR_M0M])[kbw ]; + f[DIR_P0M] = (dist.f[DIR_P0M])[kbe ]; + f[DIR_M0P] = (dist.f[DIR_M0P])[ktw ]; + f[DIR_0PP] = (dist.f[DIR_0PP])[ktn ]; + f[DIR_0MM] = (dist.f[DIR_0MM])[kbs ]; + f[DIR_0PM] = (dist.f[DIR_0PM])[kbn ]; + f[DIR_0MP] = (dist.f[DIR_0MP])[kts ]; + f[DIR_000] = (dist.f[DIR_000])[kzero]; + f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ]; + f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ]; + f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ]; + f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ]; + f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ]; + f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ]; + f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ]; + f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ]; + ////////////////////////////////////////////////////////////////////////// + real drho = vf::lbm::getDensity(f); + + real rhoCorrection = densityCorrectionFactor*drho; + + real cs = c1o1 / sqrtf(c3o1); + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + switch(direction) + { + case MZZ: + (dist.f[DIR_P00])[ke ] = computeOutflowDistribution(f, f1, DIR_P00 , rhoCorrection, cs, c2o27); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216); + break; + + case PZZ: + (dist.f[DIR_M00])[kw ] = computeOutflowDistribution(f, f1, DIR_M00, rhoCorrection, cs, c2o27); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216); + break; + + case ZMZ: + (dist.f[DIR_0P0])[kn ] = computeOutflowDistribution(f, f1, DIR_0P0, rhoCorrection, cs, c2o27); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216); + break; + + case ZPZ: + (dist.f[DIR_0M0])[ks ] =computeOutflowDistribution(f, f1, DIR_0M0, rhoCorrection, cs, c2o27); + (dist.f[DIR_PM0])[kse ] =computeOutflowDistribution(f, f1, DIR_PM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MM0])[ksw ] =computeOutflowDistribution(f, f1, DIR_MM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MP])[kts ] =computeOutflowDistribution(f, f1, DIR_0MP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MM])[kbs ] =computeOutflowDistribution(f, f1, DIR_0MM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PMP])[ktse ] =computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMP])[ktsw ] =computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[kbse ] =computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[kbsw ] =computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216); + break; + + case ZZM: + (dist.f[DIR_00P])[kt ] = computeOutflowDistribution(f, f1, DIR_00P, rhoCorrection, cs, c2o27); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); + break; + + case ZZP: + (dist.f[DIR_00M])[kb ] = computeOutflowDistribution(f, f1, DIR_00M, rhoCorrection, cs, c2o27); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216); + break; + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu index 3719ca3712e6f63a77f62bf314af7d19eea01f4c..a8f02fee717caf7f67624243b873fe993b5c7927 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu @@ -38,6 +38,7 @@ #include <cuda_runtime.h> #include <helper_cuda.h> #include "LBM/LB.h" +#include "Kernel/Utilities/DistributionHelper.cuh" using namespace vf::lbm::constant; @@ -64,15 +65,7 @@ __global__ void calcAMD(real* vx, uint size_Mat, real SGSConstant) { - - const uint x = threadIdx.x; - const uint y = blockIdx.x; - const uint z = blockIdx.y; - - const uint nx = blockDim.x; - const uint ny = gridDim.x; - - const uint k = nx*(ny*z + y) + x; + const uint k = vf::gpu::getNodeIndex(); if(k >= size_Mat) return; if(typeOfGridNode[k] != GEO_FLUID) return; @@ -102,7 +95,7 @@ __global__ void calcAMD(real* vx, (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy); - turbulentViscosity[k] = max(c0o1,-SGSConstant*enumerator)/denominator; + turbulentViscosity[k] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1; } void calcTurbulentViscosityAMD(Parameter* para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu index a9d518d14a286ae3f6b565176969162994afa269..8c5ba40baba928a627c375f32d8df914eec4fdb8 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu @@ -16,7 +16,8 @@ void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::run() vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, para->getParH(level)->numberOfNodes); LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel > <<< grid.grid, grid.threads >>>( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, para->getParD(level)->distributions.f[0], diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu index 32350b95107b68103af0f238fefe095882919092..63ca7d0673432ebef35e8e6deaaef6cf9f2cf0d4 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu @@ -29,7 +29,7 @@ //! \file TurbulentViscosityCumulantK17CompChim_Device.cu //! \author Henry Korb, Henrik Asmuth //! \date 16/05/2022 -//! \brief CumulantK17CompChim kernel by Martin Schönherr that inlcudes turbulent viscosity and other small mods. +//! \brief CumulantK17CompChim kernel by Martin Schönherr that includes turbulent viscosity and other small mods. //! //! Additions to CumulantK17CompChim: //! - can incorporate local body force @@ -43,6 +43,8 @@ #include "lbm/constants/D3Q27.h" #include <lbm/constants/NumericConstants.h> #include "Kernel/Utilities/DistributionHelper.cuh" +#include "VirtualFluids_GPU/GPU/KernelUtilities.h" +#include "Kernel/ChimeraTransformation.h" #include "GPU/TurbulentViscosityInlines.cuh" @@ -66,7 +68,7 @@ __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( real* vz, real* turbulentViscosity, real SGSconstant, - unsigned long size_Mat, + unsigned long numberOfLBnodes, int level, bool bodyForce, real* forces, @@ -91,14 +93,14 @@ __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( ////////////////////////////////////////////////////////////////////////// // run for all indices in size_Mat and fluid nodes - if ((k_000 < size_Mat) && (typeOfGridNode[k_000] == GEO_FLUID)) { + if ((k_000 < numberOfLBnodes) && (typeOfGridNode[k_000] == GEO_FLUID)) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on //! timestep is based on the esoteric twist algorithm \ref <a //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, size_Mat, isEvenTimestep); + Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) @@ -200,9 +202,9 @@ __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> //! real factor = c1o1; - for (size_t i = 1; i <= level; i++) { + for (size_t i = 1; i <= level; i++){ factor *= c2o1; - } + } real fx = forces[0]; real fy = forces[1]; @@ -680,8 +682,8 @@ __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( } } -template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::AMD > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); +template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::AMD > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long numberOfLBnodes, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); -template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::Smagorinsky > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); +template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::Smagorinsky > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long numberOfLBnodes, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::QR > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh index 5ef37557399f263d25edf03b02b00f6a03c6e1cb..6af4e0a85f1a242ff13d148a2aaecc89c5240308 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh @@ -17,7 +17,7 @@ template< TurbulenceModel turbulenceModel > __global__ void LB_Kernel_TurbulentV real* vz, real* turbulentViscosity, real SGSconstant, - unsigned long size_Mat, + unsigned long numberOfLBnodes, int level, bool bodyForce, real* forces, diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp index cc945ea225a28c58dca4ceefdb80fffb76228b21..eae5c5f9965323a8debb62789c931a70ae462a56 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp @@ -38,6 +38,7 @@ #include "BCKernelManager.h" #include "Factories/BoundaryConditionFactory.h" +#include "GridGenerator/VelocitySetter/VelocitySetter.h" #include "Calculation/Cp.h" #include "Calculation/DragLift.h" #include "GPU/GPU_Interface.h" @@ -51,6 +52,7 @@ BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFac this->pressureBoundaryConditionPre = bcFactory->getPressureBoundaryConditionPre(); this->geometryBoundaryConditionPost = bcFactory->getGeometryBoundaryConditionPost(); this->stressBoundaryConditionPost = bcFactory->getStressBoundaryConditionPost(); + this->precursorBoundaryConditionPost = bcFactory->getPrecursorBoundaryConditionPost(); checkBoundaryCondition(this->velocityBoundaryConditionPost, this->para->getParD(0)->velocityBC, "velocityBoundaryConditionPost"); @@ -64,6 +66,8 @@ BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFac "geometryBoundaryConditionPost"); checkBoundaryCondition(this->stressBoundaryConditionPost, this->para->getParD(0)->stressBC, "stressBoundaryConditionPost"); + checkBoundaryCondition(this->precursorBoundaryConditionPost, this->para->getParD(0)->precursorBC, + "precursorBoundaryConditionPost"); } void BCKernelManager::runVelocityBCKernelPre(const int level) const @@ -387,3 +391,75 @@ void BCKernelManager::runNoSlipBCKernelPost(const int level) const{ noSlipBoundaryConditionPost(para->getParD(level).get(), &(para->getParD(level)->noSlipBC)); } } + +// void LBKernelManager::calculateMacroscopicValues(const int level) const +// { +// if (para->getIsADcalculationOn()) { +// CalcMacADCompSP27( +// para->getParD()->velocityX, +// para->getParD()->velocityY, +// para->getParD()->velocityZ, +// para->getParD()->rho, +// para->getParD()->pressure, +// para->getParD()->typeOfGridNode, +// para->getParD()->neighborX, +// para->getParD()->neighborY, +// para->getParD()->neighborZ, +// para->getParD()->numberOfNodes, +// para->getParD()->numberofthreads, +// para->getParD()->distributions.f[0], +// para->getParD()->distributionsAD.f[0], +// para->getParD()->forcing, +// para->getParD()->isEvenTimestep); +// } else { +// CalcMacCompSP27( +// para->getParD()->velocityX, +// para->getParD()->velocityY, +// para->getParD()->velocityZ, +// para->getParD()->rho, +// para->getParD()->pressure, +// para->getParD()->typeOfGridNode, +// para->getParD()->neighborX, +// para->getParD()->neighborY, +// para->getParD()->neighborZ, +// para->getParD()->numberOfNodes, +// para->getParD()->numberofthreads, +// para->getParD()->distributions.f[0], +// para->getParD()->isEvenTimestep); +// } +// } + +void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager) +{ + if(para->getParH(level)->precursorBC.numberOfBCnodes == 0) return; + + uint lastTime = (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into last arrays + uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into current arrays + uint nextTime = para->getParD(level)->precursorBC.nPrecursorReads *para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into next arrays + + if(t>=currentTime) + { + //cycle time + lastTime = currentTime; + currentTime = nextTime; + nextTime += para->getParD(level)->precursorBC.nTRead; + + //cycle pointers + real* tmp = para->getParD(level)->precursorBC.last; + para->getParD(level)->precursorBC.last = para->getParD(level)->precursorBC.current; + para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next; + para->getParD(level)->precursorBC.next = tmp; + + real loadTime = nextTime*pow(2,-level)*para->getTimeRatio(); + for(auto reader : para->getParH(level)->velocityReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, loadTime); + } + cudaMemoryManager->cudaCopyPrecursorData(level); + para->getParD(level)->precursorBC.nPrecursorReads++; + para->getParH(level)->precursorBC.nPrecursorReads++; + } + + real tRatio = real(t-lastTime)/para->getParD(level)->precursorBC.nTRead; + precursorBoundaryConditionPost(para->getParD(level).get(), ¶->getParD(level)->precursorBC, tRatio, para->getVelocityRatio()); +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h index 423a9cc9056281a3a2a135ae32fa26cc47f93967..a2987e9b40900d019f95dc0fa839beb775f522ef 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h @@ -48,6 +48,7 @@ struct LBMSimulationParameter; using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>; using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>; +using precursorBoundaryCondition = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real tRatio, real velocityRatio)>; //! \class BCKernelManager //! \brief manage the cuda kernel calls to boundary conditions @@ -84,7 +85,10 @@ public: //! \brief calls the device function of the pressure boundary condition (post-collision) void runPressureBCKernelPost(const int level) const; - //! \brief calls the device function of the outflow boundary condition (pre-collision) + //! \brief calls the device function of the precursor boundary condition + void runPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager); + + //! \brief calls the device function of the outflow boundary condition void runOutflowBCKernelPre(const int level) const; //! \brief calls the device function of the stress wall model (post-collision) @@ -96,13 +100,16 @@ private: //! \param boundaryCondition: a kernel function for the boundary condition //! \param bcStruct: a struct containing the grid nodes which are part of the boundary condition //! \param bcName: the name of the checked boundary condition - template <typename bcFunction> - void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBoundaryConditions &bcStruct, const std::string &bcName) + template <typename bcFunction, typename QforBC> + void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBC &bcStruct, const std::string &bcName) { if (!boundaryCondition && bcStruct.numberOfBCnodes > 0) throw std::runtime_error("The boundary condition " + bcName + " was not set!"); } + void runDistributionPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager); + void runVelocityPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager); + SPtr<Parameter> para; boundaryCondition velocityBoundaryConditionPost = nullptr; @@ -111,5 +118,6 @@ private: boundaryCondition pressureBoundaryConditionPre = nullptr; boundaryCondition geometryBoundaryConditionPost = nullptr; boundaryConditionWithParameter stressBoundaryConditionPost = nullptr; + precursorBoundaryCondition precursorBoundaryConditionPost = nullptr; }; #endif diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp index d55fa51bd8a225dd4e89e684bc81cd56f3f450c0..a0e02112e821eedcfeb013d3465529f668309529 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp @@ -53,3 +53,9 @@ TEST_F(BCKernelManagerTest_BCsNotSpecified, stressBoundaryConditionPost_NotSpeci para->getParD(0)->stressBC.numberOfBCnodes = 1; EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error); } + +TEST_F(BCKernelManagerTest_BCsNotSpecified, precursorBoundaryConditionPost_NotSpecified) +{ + para->getParD(0)->precursorBC.numberOfBCnodes = 1; + EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error); +} diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index eea4adfda3c1ef0862f39ef58fc6e065af7bab1b..e9831253923a90dc2daf3e509fc13c01de55d142 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -46,6 +46,7 @@ #include "Core/DataTypes.h" +#include <cuda_runtime.h> #include <string> #include <vector> @@ -144,6 +145,7 @@ struct InitCondition bool hasWallModelMonitor {false}; bool simulatePorousMedia {false}; bool streetVelocityFile {false}; + real outflowPressureCorrectionFactor {0.0}; }; //Interface Cells @@ -214,6 +216,22 @@ typedef struct QforBC{ real *normalX, *normalY, *normalZ; }QforBoundaryConditions; +typedef struct QforPrecursorBC{ + int* k; + int numberOfBCnodes=0; + int sizeQ; + int numberOfPrecursorNodes=0; + uint nPrecursorReads=0; + uint nTRead; + size_t numberOfQuantities; + real* q27[27]; + uint* planeNeighborNT, *planeNeighborNB, *planeNeighborST, *planeNeighborSB; + real* weightsNT, *weightsNB, *weightsST, *weightsSB; + real* last, *current, *next; + real velocityX, velocityY, velocityZ; + cudaStream_t stream; +}QforPrecursorBoundaryConditions; + //BCTemp typedef struct TempforBC{ int* k; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp index c63c1620ae368cdb31ed582814b472b4695114bf..8e9919e3f583abe5b77163485924606646a8ec22 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp @@ -70,13 +70,13 @@ TEST_F(EdgeNodeFinderTest_findEdgeNodes, shouldReturnCorrectVectorForXY) vf::gpu::findEdgeNodesCommMultiGPU(*para); - const std::vector<std::pair<int, int>> expectedEdgeNodesXtoYRecv = { std::pair(numRecvNeighbor, 0), - std::pair(numRecvNeighbor, 4), - std::pair(numRecvNeighbor, 5) }; + const std::vector<std::pair<int, int>> expectedEdgeNodesXtoYRecv = { std::pair<int, int>(numRecvNeighbor, 0), + std::pair<int, int>(numRecvNeighbor, 4), + std::pair<int, int>(numRecvNeighbor, 5) }; - const std::vector<std::pair<int, int>> expectedEdgeNodesXtoYSend = { std::pair(numSendNeighbor, 1), - std::pair(numSendNeighbor, 6), - std::pair(numSendNeighbor, 4) }; + const std::vector<std::pair<int, int>> expectedEdgeNodesXtoYSend = { std::pair<int, int>(numSendNeighbor, 1), + std::pair<int, int>(numSendNeighbor, 6), + std::pair<int, int>(numSendNeighbor, 4) }; EXPECT_THAT(para->parH[level]->edgeNodesXtoY.size(), testing::Eq(expectedEdgeNodesXtoYRecv.size())); EXPECT_TRUE(compareEdgeNodesRecv(para->parH[level]->edgeNodesXtoY, expectedEdgeNodesXtoYRecv)) @@ -107,12 +107,12 @@ TEST_F(EdgeNodeFinderTest_findEdgeNodes, shouldReturnCorrectVectorForXZ) vf::gpu::findEdgeNodesCommMultiGPU(*para); - const std::vector<std::pair<int, int>> expectedEdgeNodesXtoZRecv = { std::pair(numRecvNeighbor, 1), - std::pair(numRecvNeighbor, 4), - std::pair(numRecvNeighbor, 6) }; - const std::vector<std::pair<int, int>> expectedEdgeNodesXtoZSend = { std::pair(numSendNeighbor, 0), - std::pair(numSendNeighbor, 5), - std::pair(numSendNeighbor, 4) }; + const std::vector<std::pair<int, int>> expectedEdgeNodesXtoZRecv = { std::pair<int, int>(numRecvNeighbor, 1), + std::pair<int, int>(numRecvNeighbor, 4), + std::pair<int, int>(numRecvNeighbor, 6) }; + const std::vector<std::pair<int, int>> expectedEdgeNodesXtoZSend = { std::pair<int, int>(numSendNeighbor, 0), + std::pair<int, int>(numSendNeighbor, 5), + std::pair<int, int>(numSendNeighbor, 4) }; EXPECT_THAT(para->parH[level]->edgeNodesXtoZ.size(), testing::Eq(expectedEdgeNodesXtoZRecv.size())); EXPECT_TRUE(compareEdgeNodesRecv(para->parH[level]->edgeNodesXtoZ, expectedEdgeNodesXtoZRecv)) diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index dc7d5cb07e573003bfebfa7ef327dddb1f9d4aa4..4123f39f351c4bf41d536bff0d1deea3fbe6e2aa 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -883,6 +883,10 @@ void Parameter::setPressOutZ(unsigned int PressOutZ) { ic.PressOutZ = PressOutZ; } +void Parameter::setOutflowPressureCorrectionFactor(real pressBCrhoCorrectionFactor) +{ + ic.outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor; +} void Parameter::setMaxDev(int maxdev) { ic.maxdev = maxdev; @@ -1906,6 +1910,10 @@ unsigned int Parameter::getPressOutZ() { return ic.PressOutZ; } +real Parameter::getOutflowPressureCorrectionFactor() +{ + return ic.outflowPressureCorrectionFactor; +} int Parameter::getMaxDev() { return ic.maxdev; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index a397948ef8fe642df377681404e870b90aac100a..aff100584abef9797a0f72c11319be4719503d92 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -54,6 +54,8 @@ class ConfigurationFile; } class CudaStreamManager; +class VelocityReader; + //! \struct LBMSimulationParameter //! \brief struct holds and manages the LB-parameter of the simulation //! \brief For this purpose it holds structures and pointer for host and device data, respectively. @@ -218,16 +220,16 @@ struct LBMSimulationParameter { OffsetFC offFCBulk; unsigned int mem_size_kCF_off; unsigned int mem_size_kFC_off; - - // BC's//////////////////// + //! \brief stores the boundary condition data QforBoundaryConditions noSlipBC, velocityBC, outflowBC, slipBC, stressBC, pressureBC; //! \brief number of lattice nodes for the boundary conditions - unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead; + unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead, numberOfPrecursorBCnodesRead; QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; // DEPRECATED QforBoundaryConditions propellerBC; QforBoundaryConditions geometryBC; + QforPrecursorBoundaryConditions precursorBC; QforBoundaryConditions geometryBCnormalX, geometryBCnormalY, geometryBCnormalZ; QforBoundaryConditions inflowBCnormalX, inflowBCnormalY, inflowBCnormalZ; QforBoundaryConditions outflowBCnormalX, outflowBCnormalY, outflowBCnormalZ; @@ -235,6 +237,8 @@ struct LBMSimulationParameter { unsigned int kInletQread, kOutletQread; // DEPRECATED WallModelParameters wallModel; + std::vector<SPtr<VelocityReader>> velocityReader; + real outflowPressureCorrectionFactor; // testRoundoffError Distributions27 kDistTestRE; @@ -468,6 +472,7 @@ public: void setpressBcPos(std::string pressBcPos); void setpressBcQs(std::string pressBcQs); void setpressBcValue(std::string pressBcValue); + void setOutflowPressureCorrectionFactor(real correctionFactor); void setpressBcValues(std::string pressBcValues); void setvelBcQs(std::string velBcQs); void setvelBcValues(std::string velBcValues); @@ -524,7 +529,6 @@ public: void setUseWale(bool useWale); void setTurbulenceModel(TurbulenceModel turbulenceModel); void setUseTurbulentViscosity(bool useTurbulentViscosity); - void setUseAMD(bool useAMD); void setSGSConstant(real SGSConstant); void setHasWallModelMonitor(bool hasWallModelMonitor); void setUseInitNeq(bool useInitNeq); @@ -850,6 +854,7 @@ public: std::string getOutflowBoundaryNormalX(); std::string getOutflowBoundaryNormalY(); std::string getOutflowBoundaryNormalZ(); + real getOutflowPressureCorrectionFactor(); // CUDA random number curandState *getRandomState(); // Kernel diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu index 71897bd21ea4fb299d3cc0ffa385506d4503f360..60dd7d3b581a102ad7b9c77f9eb6fb9a56f64bd7 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu @@ -188,9 +188,9 @@ __global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* grid } } - atomicAdd(&gridForcesX[gridIndex], gridForceX_RF); - atomicAdd(&gridForcesY[gridIndex], gridForceY_RF); - atomicAdd(&gridForcesZ[gridIndex], gridForceZ_RF); + gridForcesX[gridIndex] = gridForceX_RF; + gridForcesY[gridIndex] = gridForceY_RF; + gridForcesZ[gridIndex] = gridForceZ_RF; } @@ -210,7 +210,7 @@ void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaMemoryManage { if (level != this->level) return; - cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); + if(useHostArrays) cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->nNodes); @@ -225,11 +225,11 @@ void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaMemoryManage this->turbinePosX, this->turbinePosY, this->turbinePosZ, this->bladeIndicesD, para->getVelocityRatio(), this->invDeltaX); - cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this); + if(useHostArrays) cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this); this->calcBladeForces(); - cudaMemoryManager->cudaCopyBladeForcesHtoD(this); + if(useHostArrays) cudaMemoryManager->cudaCopyBladeForcesHtoD(this); vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->nIndices); @@ -369,6 +369,14 @@ void ActuatorLine::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemo } cudaMemoryManager->cudaCopyBladeIndicesHtoD(this); } +void ActuatorLine::setPreInitBladeRadii(real* _bladeRadii) +{ + this->bladeRadiiPreInit = (real*) malloc(this->nBladeNodes*sizeof(real)); + for(uint node=0; node<this->nBladeNodes; node++) + { + this->bladeRadiiPreInit[node] = _bladeRadii[node]; + } +} void ActuatorLine::initBoundingSphere(Parameter* para, CudaMemoryManager* cudaMemoryManager) { @@ -420,4 +428,27 @@ void ActuatorLine::setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real this->bladeForcesYH[node] = _bladeForcesY[node]; this->bladeForcesZH[node] = _bladeForcesZ[node]; } +} +void ActuatorLine::setBladeCoordsD(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ) +{ + throw std::runtime_error("not implemented"); + this->bladeCoordsXD = _bladeCoordsX; + this->bladeCoordsYD = _bladeCoordsY; + this->bladeCoordsZD = _bladeCoordsZ; +} + +void ActuatorLine::setBladeVelocitiesD(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ) +{ + throw std::runtime_error("not implemented"); + this->bladeVelocitiesXD = _bladeVelocitiesX; + this->bladeVelocitiesYD = _bladeVelocitiesY; + this->bladeVelocitiesZD = _bladeVelocitiesZ; +} + +void ActuatorLine::setBladeForcesD(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ) +{ + throw std::runtime_error("not implemented"); + this->bladeCoordsXD = _bladeForcesX; + this->bladeCoordsYD = _bladeForcesY; + this->bladeCoordsZD = _bladeForcesZ; } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h index b44c89c5020eb206baa3bba1994b1e45f760c3bb..a441387512cc86e83453d9a4689d541b17dfde0f 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h @@ -22,7 +22,8 @@ public: const real _diameter, int _level, const real _deltaT, - const real _deltaX + const real _deltaX, + const bool _useHostArrays ) : nBlades(_nBlades), density(_density), nBladeNodes(_nBladeNodes), @@ -30,6 +31,7 @@ public: turbinePosX(_turbinePosX), turbinePosY(_turbinePosY), turbinePosZ(_turbinePosZ), diameter(_diameter), level(_level), + useHostArrays(_useHostArrays), PreCollisionInteractor() { this->deltaT = _deltaT*exp2(-this->level); @@ -58,6 +60,8 @@ public: real getAzimuth(){ return this->azimuth; }; real getYaw(){ return this->yaw; }; real getDensity(){ return this->density; }; + real getDeltaT(){ return this->deltaT; }; + real getDeltaX(){ return this->deltaX; }; real getPositionX(){ return this->turbinePosX; }; real getPositionY(){ return this->turbinePosY; }; real getPositionZ(){ return this->turbinePosZ; }; @@ -72,12 +76,27 @@ public: real* getBladeForcesY(){ return this->bladeForcesYH; }; real* getBladeForcesZ(){ return this->bladeForcesZH; }; + real* getBladeRadiiD(){ return this->bladeRadiiD; }; + real* getBladeCoordsXD(){ return this->bladeCoordsXD; }; + real* getBladeCoordsYD(){ return this->bladeCoordsYD; }; + real* getBladeCoordsZD(){ return this->bladeCoordsZD; }; + real* getBladeVelocitiesXD(){ return this->bladeVelocitiesXD; }; + real* getBladeVelocitiesYD(){ return this->bladeVelocitiesYD; }; + real* getBladeVelocitiesZD(){ return this->bladeVelocitiesZD; }; + real* getBladeForcesXD(){ return this->bladeForcesXD; }; + real* getBladeForcesYD(){ return this->bladeForcesYD; }; + real* getBladeForcesZD(){ return this->bladeForcesZD; }; + void setOmega(real _omega){ this->omega = _omega; }; void setAzimuth(real _azimuth){ this->azimuth = _azimuth; }; void setYaw(real _yaw){ this->yaw = _yaw; }; + void setPreInitBladeRadii(real* _bladeRadii); void setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ); void setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ); void setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ); + void setBladeCoordsD(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ); + void setBladeVelocitiesD(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ); + void setBladeForcesD(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ); virtual void calcBladeForces(); private: @@ -92,6 +111,7 @@ private: void calcForcesEllipticWing(); public: + real* bladeRadiiPreInit; real* bladeRadiiH; real* bladeRadiiD; real* bladeCoordsXH, * bladeCoordsYH, * bladeCoordsZH; @@ -106,6 +126,7 @@ public: uint* boundingSphereIndicesD; private: + const bool useHostArrays; const real density; real turbinePosX, turbinePosY, turbinePosZ; real omega, azimuth, yaw, deltaT, deltaX, invDeltaX, forceRatio, factorGaussian, invEpsilonSqrd; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu new file mode 100644 index 0000000000000000000000000000000000000000..f2be9567b450f42627ee9647727b321a89baf387 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu @@ -0,0 +1,300 @@ +#include "PrecursorWriter.h" +#include "basics/writer/WbWriterVtkXmlImageBinary.h" + +#include <cuda.h> +#include <cuda_runtime.h> +#include <helper_cuda.h> +#include <cuda/CudaGrid.h> +#include "Kernel/Utilities/DistributionHelper.cuh" + +#include <Core/StringUtilities/StringUtil.h> + +#include "Parameter/Parameter.h" +#include "DataStructureInitializer/GridProvider.h" +#include "GPU/CudaMemoryManager.h" + +using namespace vf::lbm::dir; + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//TODO check everything for multiple level +void index1d(int& idx, int y, int z, int ny, int nz) +{ + idx = y+ny*z; +} + +void index2d(int idx, int& y, int& z, int ny, int nz) +{ + z = idx/ny; + y = idx-ny*z; +} + +__inline__ __host__ __device__ uint lIndex(const uint component, const uint node, const uint timestep, const uint nComponents, const uint nNodes) +{ + return node+nNodes*(component+timestep*nComponents); +} + +__inline__ __host__ __device__ uint lIndex(const uint component, const uint node, const uint nNodes) +{ + return node+component*nNodes; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void fillArrayVelocities(const uint nNodes, + uint* indices, + real *precursorData, + real *vx, + real *vy, + real *vz, + real velocityRatio) + + +{ + const uint node = vf::gpu::getNodeIndex(); + + if(node>=nNodes) return; + + precursorData[lIndex(0u, node, nNodes)] = vx[indices[node]]*velocityRatio; + precursorData[lIndex(1u, node, nNodes)] = vy[indices[node]]*velocityRatio; + precursorData[lIndex(2u, node, nNodes)] = vz[indices[node]]*velocityRatio; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void fillArrayDistributions( uint nNodes, uint* indices, + real* precursorData, + real* distributions, + uint* neighborX, uint* neighborY, uint* neighborZ, + bool isEvenTimestep, + unsigned long numberOfLBnodes) +{ + const uint node = vf::gpu::getNodeIndex(); + + if(node>=nNodes) return; + + Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep); + + //////////////////////////////////////////////////////////////////////////////// + // ! - Set neighbor indices (necessary for indirect addressing) + uint k_000 = indices[node]; + // uint k_M00 = neighborX[k_000]; + uint k_0M0 = neighborY[k_000]; + uint k_00M = neighborZ[k_000]; + // uint k_MM0 = neighborY[k_M00]; + // uint k_M0M = neighborZ[k_M00]; + uint k_0MM = neighborZ[k_0M0]; + // uint k_MMM = neighborZ[k_MM0]; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Get local distributions in PX directions + //! + precursorData[lIndex(PrecP00, node, nNodes)] = (dist.f[DIR_P00])[k_000]; + precursorData[lIndex(PrecPP0, node, nNodes)] = (dist.f[DIR_PP0])[k_000]; + precursorData[lIndex(PrecPM0, node, nNodes)] = (dist.f[DIR_PM0])[k_0M0]; + precursorData[lIndex(PrecP0P, node, nNodes)] = (dist.f[DIR_P0P])[k_000]; + precursorData[lIndex(PrecP0M, node, nNodes)] = (dist.f[DIR_P0M])[k_00M]; + precursorData[lIndex(PrecPPP, node, nNodes)] = (dist.f[DIR_PPP])[k_000]; + precursorData[lIndex(PrecPMP, node, nNodes)] = (dist.f[DIR_PMP])[k_0M0]; + precursorData[lIndex(PrecPPM, node, nNodes)] = (dist.f[DIR_PPM])[k_00M]; + precursorData[lIndex(PrecPMM, node, nNodes)] = (dist.f[DIR_PMM])[k_0MM]; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) +{ + precursorStructs.resize(para->getMaxLevel()+1); + for(int level=0; level<=para->getMaxLevel(); level++) + { + + real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]); + int maxPoints = (int((yMax-yMin)/dx)+1)* (int((zMax-zMin)/dx)+1); + + real lowestY, lowestZ, highestY, highestZ; + + lowestY = para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes-1]; + highestY = para->getParH(level)->coordinateY[1]; + + lowestZ = para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes-1]; + highestZ = para->getParH(level)->coordinateZ[1]; + + std::vector<uint> indicesOnGrid; + std::vector<int> indicesOnPlane; + std::vector<real> coordY, coordZ; + + for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ ) + { + real pointCoordX = para->getParH(level)->coordinateX[j]; + real pointCoordY = para->getParH(level)->coordinateY[j]; + real pointCoordZ = para->getParH(level)->coordinateZ[j]; + if( pointCoordX < (dx+xPos) && pointCoordX >= xPos && + pointCoordY<=yMax && pointCoordY>=yMin && + pointCoordZ<=zMax && pointCoordZ>=zMin) + { + highestY = max(highestY, pointCoordY); + highestZ = max(highestZ, pointCoordZ); + + lowestY = min(lowestY, pointCoordY); + lowestZ = min(lowestZ, pointCoordZ); + indicesOnGrid.push_back(j); + coordY.push_back(pointCoordY); + coordZ.push_back(pointCoordZ); + } + } + assert("PrecursorWriter did not find any points on the grid"&& indicesOnGrid.size()==0); + int ny = int((highestY-lowestY)/dx)+1; + int nz = int((highestZ-lowestZ)/dx)+1; + + for(uint i=0;i<indicesOnGrid.size(); i++) + { + int idxY = int((coordY[i]-lowestY)/dx); + int idxZ = int((coordZ[i]-lowestZ)/dx); + int idx; + index1d(idx, idxY, idxZ, ny, nz); + indicesOnPlane.push_back(idx); + // printf("idx %d, idy %d, idz %d, ny %d, nz %d\n", idx, idxY, idxZ, ny, nz); + } + + precursorStructs[level] = SPtr<PrecursorStruct>(new PrecursorStruct); + precursorStructs[level]->nPoints = (uint)indicesOnGrid.size(); + precursorStructs[level]->indicesOnPlane = (int*) malloc(precursorStructs[level]->nPoints*sizeof(int)); + precursorStructs[level]->spacing = makeUbTuple(dx, dx, tSave*para->getTimeRatio()); + precursorStructs[level]->origin = makeUbTuple(lowestY, lowestZ); + precursorStructs[level]->extent = makeUbTuple(0, ny-1, 0, nz-1); + precursorStructs[level]->nPointsInPlane = ny*nz; + precursorStructs[level]->timestepsPerFile = min(para->getlimitOfNodesForVTK()/(ny*nz), maxtimestepsPerFile); + precursorStructs[level]->filesWritten = 0; + precursorStructs[level]->timestepsBuffered = 0; + + switch (outputVariable) + { + case OutputVariable::Velocities: + precursorStructs[level]->nQuantities = 3; + break; + case OutputVariable::Distributions: + precursorStructs[level]->nQuantities = 9; + break; + + default: + break; + } + + // printf("points %zu points on plane %zu \n", indicesOnGrid.size(), indicesOnPlane.size()); + + cudaManager->cudaAllocPrecursorWriter(this, level); + + std::copy(indicesOnGrid.begin(), indicesOnGrid.end(), precursorStructs[level]->indicesH); + std::copy(indicesOnPlane.begin(), indicesOnPlane.end(), precursorStructs[level]->indicesOnPlane); + + cudaManager->cudaCopyPrecursorWriterIndicesHtoD(this, level); + } +} + + +void PrecursorWriter::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) +{ + if(t>tStartOut ? ((t-tStartOut) % tSave)==0 : false) + { + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, precursorStructs[level]->nPoints); + + if(this->outputVariable==OutputVariable::Velocities) + { + fillArrayVelocities<<<grid.grid, grid.threads>>>( precursorStructs[level]->nPoints, precursorStructs[level]->indicesD, + precursorStructs[level]->bufferD, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getVelocityRatio()); + getLastCudaError("In PrecursorWriter::interact fillArrayVelocities execution failed"); + } + else if(this->outputVariable==OutputVariable::Distributions) + { + fillArrayDistributions<<<grid.grid, grid.threads>>>(precursorStructs[level]->nPoints, precursorStructs[level]->indicesD, + precursorStructs[level]->bufferD, + para->getParD(level)->distributions.f[0], + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getEvenOrOdd(level), para->getParD(level)->numberOfNodes); + getLastCudaError("In PrecursorWriter::interact fillArrayDistributions execution failed"); + } + cudaManager->cudaCopyPrecursorWriterOutputVariablesDtoH(this, level); + + // switch device buffer and data pointer so precursor data is gathered in buffer and copied from bufferD to bufferH + real *tmp = precursorStructs[level]->bufferD; + precursorStructs[level]->bufferD = precursorStructs[level]->dataD; + precursorStructs[level]->dataD = tmp; + + precursorStructs[level]->timestepsBuffered++; + + if(precursorStructs[level]->timestepsBuffered >= precursorStructs[level]->timestepsPerFile) + { + // switch host buffer and data pointer so precursor data is copied in buffer and written from data + + tmp = precursorStructs[level]->bufferH; + precursorStructs[level]->bufferH = precursorStructs[level]->dataH; + precursorStructs[level]->dataH = tmp; + + writeFuture.wait(); + writeFuture = std::async(std::launch::async, [this](Parameter* para, uint level, uint timesteps){ this->write(para, level, timesteps); }, para, level, precursorStructs[level]->timestepsBuffered); + precursorStructs[level]->timestepsBuffered = 0; + } + } +} + + +void PrecursorWriter::free(Parameter* para, CudaMemoryManager* cudaManager) +{ + writeFuture.wait(); + for(int level=0; level<=para->getMaxLevel(); level++) + { + if(getPrecursorStruct(level)->timestepsBuffered>0) + write(para, level, getPrecursorStruct(level)->timestepsBuffered); + + cudaManager->cudaFreePrecursorWriter(this, level); + } +} + + +void PrecursorWriter::write(Parameter* para, int level, uint timestepsBuffered) +{ + std::string fname = this->makeFileName(fileName, level, para->getMyProcessID(), precursorStructs[level]->filesWritten) + getWriter()->getFileExtension(); + std::string wholeName = outputPath + "/" + fname; + + uint nPointsInPlane = precursorStructs[level]->nPointsInPlane; + + int startTime = precursorStructs[level]->filesWritten*precursorStructs[level]->timestepsPerFile; + + // printf("points in plane %d, total timesteps %d, ntimesteps %d \n", nPointsInPlane, nTotalTimesteps, nTimesteps); + + UbTupleInt6 extent = makeUbTuple( val<1>(precursorStructs[level]->extent), val<2>(precursorStructs[level]->extent), + val<3>(precursorStructs[level]->extent), val<4>(precursorStructs[level]->extent), + startTime, startTime+(int)timestepsBuffered-1); + + UbTupleFloat3 origin = makeUbTuple( val<1>(precursorStructs[level]->origin), val<2>(precursorStructs[level]->origin), 0.f); + + std::vector<std::vector<double>> nodedata; + + for(uint quant=0; quant<precursorStructs[level]->nQuantities; quant++) + { + std::vector<double> doubleArr(nPointsInPlane*timestepsBuffered, NAN); + for( uint timestep=0; timestep<timestepsBuffered; timestep++) + { + for (uint pos=0; pos < precursorStructs[level]->nPoints; pos++) + { + int indexOnPlane = precursorStructs[level]->indicesOnPlane[pos]+timestep*nPointsInPlane; + doubleArr[indexOnPlane] = double(precursorStructs[level]->dataH[lIndex(quant, pos, timestep, precursorStructs[level]->nQuantities, precursorStructs[level]->nPoints)]); + } + } + nodedata.push_back(doubleArr); + } + + std::vector<std::vector<double>> celldata; + getWriter()->writeData(wholeName, nodedatanames, celldatanames, nodedata, celldata, extent, origin, precursorStructs[level]->spacing, extent); + precursorStructs[level]->filesWritten++; +} + +std::string PrecursorWriter::makeFileName(std::string fileName, int level, int id, uint filesWritten) +{ + return fileName + "_lev_" + StringUtil::toString<int>(level) + + "_ID_" + StringUtil::toString<int>(id) + + "_File_" + StringUtil::toString<int>(filesWritten); +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h new file mode 100644 index 0000000000000000000000000000000000000000..68f69d8122c33f7283783cf002596e0b03d31513 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h @@ -0,0 +1,119 @@ +#ifndef PRECURSORPROBE_H_ +#define PRECURSORPROBE_H_ + +#include "PreCollisionInteractor.h" +#include "WbWriterVtkXmlImageBinary.h" +#include "LBM/LB.h" +#include <string> +#include <vector> +#include <future> +#include "PointerDefinitions.h" + + +class Parameter; +class CudaMemoryManager; +class GridProvider; + +enum class OutputVariable { + //! - Velocities + Velocities, + //! - Distributions + Distributions +}; + +static constexpr uint PrecP00 = 0; +static constexpr uint PrecPP0 = 1; +static constexpr uint PrecPM0 = 2; +static constexpr uint PrecP0P = 3; +static constexpr uint PrecP0M = 4; +static constexpr uint PrecPPP = 5; +static constexpr uint PrecPMP = 6; +static constexpr uint PrecPPM = 7; +static constexpr uint PrecPMM = 8; + +struct PrecursorStruct +{ + uint nPoints, nPointsInPlane, timestepsPerFile, filesWritten, timestepsBuffered; + uint *indicesH, *indicesD; + real *dataH, *dataD; + real *bufferH, *bufferD; + uint nQuantities; + UbTupleInt4 extent; + UbTupleFloat2 origin; + UbTupleFloat3 spacing; + int* indicesOnPlane; + cudaStream_t stream; +}; + +class PrecursorWriter : public PreCollisionInteractor +{ +public: + PrecursorWriter( + const std::string _fileName, + const std::string _outputPath, + real _xPos, + real _yMin, real _yMax, + real _zMin, real _zMax, + uint _tStartOut, + uint _tSave, + OutputVariable _outputVariable, + uint _maxTimestepsPerFile=uint(1e4) + ): + fileName(_fileName), + outputPath(_outputPath), + xPos(_xPos), + yMin(_yMin), + yMax(_yMax), + zMin(_zMin), + zMax(_zMax), + tStartOut(_tStartOut), + tSave(_tSave), + outputVariable(_outputVariable), + maxtimestepsPerFile(_maxTimestepsPerFile) + { + nodedatanames = determineNodeDataNames(); + writeFuture = std::async([](){}); + }; + + void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override; + void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override; + void free(Parameter* para, CudaMemoryManager* cudaManager) override; + + OutputVariable getOutputVariable(){ return this->outputVariable; } + + SPtr<PrecursorStruct> getPrecursorStruct(int level){return precursorStructs[level];} + static std::string makeFileName(std::string fileName, int level, int id, uint part); + +private: + WbWriterVtkXmlImageBinary* getWriter(){ return WbWriterVtkXmlImageBinary::getInstance(); }; + void write(Parameter* para, int level, uint timestepsBuffered); + + std::vector<std::string> determineNodeDataNames() + { + switch (outputVariable) + { + case OutputVariable::Velocities: + return {"vx", "vy", "vz"}; + break; + case OutputVariable::Distributions: + return {"fP00", "fPP0", "fPM0", "fP0P", "fP0M", "fPPP", "fPMP", "fPPM", "fPMM"}; + break; + + default: + throw std::runtime_error("Invalid OutputVariable for PrecursorWriter"); + break; + } + } + +private: + std::vector<SPtr<PrecursorStruct>> precursorStructs; + std::string fileName, outputPath; + std::vector<std::string> nodedatanames; + std::vector<std::string> celldatanames; + uint tStartOut, tSave, maxtimestepsPerFile; + real xPos, yMin, yMax, zMin, zMax; + OutputVariable outputVariable; + std::future<void> writeFuture; +}; + +#endif //PRECURSORPROBE_H_ \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h index 3440c01020f9b3505be7148024e47373b76648ff..92b1923881526f631cdef1e7c1543d25997cb82f 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h @@ -54,7 +54,7 @@ public: ): Probe(_probeName, _outputPath, _tStartAvg, - 0, + _tStartAvg+1, _tAvg, _tStartOut, _tOut, diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu index cc027b07bded01455437e65e08ccdcd51bcf7dc0..0d42c5030363b6c0b3b67db0ed7c75f1ba3ab729 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu @@ -329,6 +329,22 @@ void Probe::addStatistic(Statistic variable) } } +std::string Probe::makeParallelFileName(int id, int t) +{ + return this->probeName + "_bin_ID_" + StringUtil::toString<int>(id) + + "_t_" + StringUtil::toString<int>(t) + + ".vtk"; +} + +std::string Probe::makeGridFileName(int level, int id, int t, uint part) +{ + return this->probeName + "_bin_lev_" + StringUtil::toString<int>(level) + + "_ID_" + StringUtil::toString<int>(id) + + "_Part_" + StringUtil::toString<int>(part) + + "_t_" + StringUtil::toString<int>(t) + + ".vtk"; +} + void Probe::addAllAvailableStatistics() { for( int var=0; var < int(Statistic::LAST); var++) @@ -347,119 +363,75 @@ void Probe::write(Parameter* para, int level, int t) std::vector<std::string> fnames; for (uint i = 1; i <= numberOfParts; i++) { - std::string fname = this->probeName + "_bin_lev_" + StringUtil::toString<int>(level) - + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) - + "_Part_" + StringUtil::toString<int>(i); - if(!this->outputTimeSeries) fname += "_t_" + StringUtil::toString<int>(t_write); - fname += ".vtk"; - fnames.push_back(fname); - this->fileNamesForCollectionFile.push_back(fname); + this->writeGridFile(para, level, t_write, i); } - this->writeGridFiles(para, level, fnames, t); - - if(level == 0 && !this->outputTimeSeries) this->writeCollectionFile(para, t); + if(level == 0&& !this->outputTimeSeries) this->writeParallelFile(para, t); } -void Probe::writeCollectionFile(Parameter* para, int t) +void Probe::writeParallelFile(Parameter* para, int t) { int t_write = this->fileNameLU ? t: t/this->tOut; - std::string filename = this->probeName + "_bin_ID_" + StringUtil::toString<int>(para->getMyProcessID()) - + "_t_" + StringUtil::toString<int>(t_write) - + ".vtk"; - - std::ofstream file; - - file.open(this->outputPath + "/" + filename + ".pvtu" ); - - ////////////////////////////////////////////////////////////////////////// - - file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl; - file << " <PUnstructuredGrid GhostLevel=\"1\">" << std::endl; - - file << " <PPointData>" << std::endl; - - for(std::string varName: this->getVarNames()) //TODO - { - file << " <DataArray type=\"Float64\" Name=\""<< varName << "\" /> " << std::endl; - } - file << " </PPointData>" << std::endl; + std::string filename = this->outputPath + "/" + this->makeParallelFileName(para->getMyProcessID(), t_write); - file << " <PPoints>" << std::endl; - file << " <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl; - file << " </PPoints>" << std::endl; + std::vector<std::string> cellNames; - for( auto& fname : this->fileNamesForCollectionFile ) - { - const auto filenameWithoutPath=fname.substr( fname.find_last_of('/') + 1 ); - file << " <Piece Source=\"" << filenameWithoutPath << ".bin.vtu\"/>" << std::endl; - } - - file << " </PUnstructuredGrid>" << std::endl; - file << "</VTKFile>" << std::endl; - - ////////////////////////////////////////////////////////////////////////// - - file.close(); + getWriter()->writeParallelFile(filename, fileNamesForCollectionFile, varNames, cellNames); this->fileNamesForCollectionFile.clear(); } -void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& fnames, int t) +void Probe::writeGridFile(Parameter* para, int level, int t, uint part) { + std::string fname = this->outputPath + "/" + this->makeGridFileName(level, para->getMyProcessID(), t, part); + std::vector< UbTupleFloat3 > nodes; std::vector< std::string > nodedatanames = this->getVarNames(); - uint startpos = 0; - uint endpos = 0; - uint sizeOfNodes = 0; std::vector< std::vector< double > > nodedata(nodedatanames.size()); SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level); - for (uint part = 0; part < fnames.size(); part++) - { - startpos = part * para->getlimitOfNodesForVTK(); - uint nDataPoints = this->outputTimeSeries? this->tProbe: probeStruct->nPoints; - sizeOfNodes = min(para->getlimitOfNodesForVTK(), nDataPoints - startpos); - endpos = startpos + sizeOfNodes; + uint startpos = (part-1) * para->getlimitOfNodesForVTK(); + uint sizeOfNodes = min(para->getlimitOfNodesForVTK(), probeStruct->nPoints - startpos); + uint endpos = startpos + sizeOfNodes; - ////////////////////////////////////////////////////////////////////////// - nodes.resize(sizeOfNodes); + ////////////////////////////////////////////////////////////////////////// + nodes.resize(sizeOfNodes); - for (uint pos = startpos; pos < endpos; pos++) - { - nodes[pos-startpos] = makeUbTuple( float(probeStruct->pointCoordsX[pos]), - float(probeStruct->pointCoordsY[pos]), - float(probeStruct->pointCoordsZ[pos])); - } + for (uint pos = startpos; pos < endpos; pos++) + { + nodes[pos-startpos] = makeUbTuple( float(probeStruct->pointCoordsX[pos]), + float(probeStruct->pointCoordsY[pos]), + float(probeStruct->pointCoordsZ[pos])); + } - for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes); + for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes); - for( int var=0; var < int(Statistic::LAST); var++){ - if(this->quantities[var]) - { - Statistic statistic = static_cast<Statistic>(var); - real coeff; + for( int var=0; var < int(Statistic::LAST); var++){ + if(this->quantities[var]) + { + Statistic statistic = static_cast<Statistic>(var); + real coeff; + + std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic); + uint n_arrs = uint(postProcessingVariables.size()); - std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic); - uint n_arrs = uint(postProcessingVariables.size()); + uint arrOff = probeStruct->arrayOffsetsH[var]; + uint arrLen = probeStruct->nPoints; - uint arrOff = probeStruct->arrayOffsetsH[var]; - uint arrLen = probeStruct->nPoints; + for(uint arr=0; arr<n_arrs; arr++) + { + coeff = postProcessingVariables[arr].conversionFactor(level); - for(uint arr=0; arr<n_arrs; arr++) + for (uint pos = startpos; pos < endpos; pos++) { - coeff = postProcessingVariables[arr].conversionFactor(level); - - for (uint pos = startpos; pos < endpos; pos++) - { - nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff); - } + nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff); } } } - WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(this->outputPath + "/" + fnames[part], nodes, nodedatanames, nodedata); } + + this->fileNamesForCollectionFile.push_back(getWriter()->writeNodesWithNodeData(fname, nodes, nodedatanames, nodedata)); } std::vector<std::string> Probe::getVarNames() diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h index 9cb0bd43e27fb7a28cae9c363ce245fbd9cc5677..4facdca87af55b57db85eeb0686e9e46c0771f47 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h @@ -49,6 +49,7 @@ #include "PreCollisionInteractor/PreCollisionInteractor.h" #include "PointerDefinitions.h" +#include "WbWriterVtkXmlBinary.h" //======================================================================================= //! \note How to add new Statistics @@ -152,8 +153,8 @@ public: outputTimeSeries(_outputTimeSeries), PreCollisionInteractor() { - if (_tStartOut<_tStartAvg) throw std::runtime_error("Probe: tStartOut must be larger than tStartAvg!"); - if (_tStartTmpAvg<_tStartAvg) throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!"); + if (_tStartOut<_tStartAvg) throw std::runtime_error(_probeName + ": tStartOut must be larger than tStartAvg!"); + if (_tStartTmpAvg<_tStartAvg) throw std::runtime_error(_probeName + ": tStartTmpAvg must be larger than tStartAvg!"); } void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override; @@ -171,6 +172,8 @@ public: void setFileNameToNOut(){this->fileNameLU = false;} void setTStartTmpAveraging(uint _tStartTmpAveraging){this->tStartTmpAveraging = _tStartTmpAveraging;} +protected: + virtual WbWriterVtkXmlBinary* getWriter(){ return WbWriterVtkXmlBinary::getInstance(); }; real getNondimensionalConversionFactor(int level); private: @@ -188,12 +191,15 @@ private: int level); virtual void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) = 0; - void write(Parameter* para, int level, int t); - void writeCollectionFile(Parameter* para, int t); - void writeGridFiles(Parameter* para, int level, std::vector<std::string >& fnames, int t); + virtual void write(Parameter* para, int level, int t); + virtual void writeParallelFile(Parameter* para, int t); + virtual void writeGridFile(Parameter* para, int level, int t, uint part); + std::vector<std::string> getVarNames(); - -private: + std::string makeGridFileName(int level, int id, int t, uint part); + std::string makeParallelFileName(int id, int t); + +protected: const std::string probeName; const std::string outputPath; @@ -215,7 +221,6 @@ protected: uint tProbe = 0; //!> counter for number of probe evaluations. Only used when outputting timeseries - std::function<real(int)> velocityRatio; std::function<real(int)> densityRatio; std::function<real(int)> forceRatio; diff --git a/src/lbm/constants/NumericConstants.h b/src/lbm/constants/NumericConstants.h index 4918d49aaa0431de639ea8ba3320c4fa45e539d4..1a1350604bf23936cfe091a0291a0f3392697315 100644 --- a/src/lbm/constants/NumericConstants.h +++ b/src/lbm/constants/NumericConstants.h @@ -18,6 +18,7 @@ static constexpr double c1o8 = 0.125; static constexpr double c1o9 = 0.111111111111111; static constexpr double c2o9 = 0.222222222222222; static constexpr double c4o9 = 0.444444444444444; +static constexpr double c4o10 = 0.4; static constexpr double c1o10 = 0.1; static constexpr double c1o12 = 0.083333333333333; static constexpr double c1o16 = 0.0625; @@ -99,15 +100,15 @@ static constexpr double c72o1 = 72.; static constexpr double c84o1 = 84.; static constexpr double c88o1 = 88.; static constexpr double c96o1 = 96.; -static constexpr double c100o1 = 10.; -static constexpr double c130o1 = 13.; -static constexpr double c152o1 = 15.; -static constexpr double c166o1 = 16.; -static constexpr double c195o1 = 19.; -static constexpr double c216o1 = 21.; -static constexpr double c264o1 = 26.; -static constexpr double c290o1 = 29.; -static constexpr double c367o1 = 36.; +static constexpr double c100o1 = 100.; +static constexpr double c130o1 = 130.; +static constexpr double c152o1 = 152.; +static constexpr double c166o1 = 166.; +static constexpr double c195o1 = 195.; +static constexpr double c216o1 = 216.; +static constexpr double c264o1 = 264.; +static constexpr double c290o1 = 290.; +static constexpr double c367o1 = 367.; static constexpr double Op0000002 = 0.0000002; static constexpr double c10eM30 = 1e-30; @@ -132,6 +133,7 @@ static constexpr float c1o8 = 0.125f; static constexpr float c1o9 = (1.0f / 9.0f); static constexpr float c2o9 = (2.0f / 9.0f); static constexpr float c4o9 = (4.0f / 9.0f); +static constexpr float c4o10 = 0.4f; static constexpr float c1o10 = 0.1f; static constexpr float c1o12 = (1.0f / 12.0f); static constexpr float c1o16 = 0.0625f; diff --git a/utilities/setup_builder.py b/utilities/setup_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..821d72ede650937a5fa2873505fd2898164a239e --- /dev/null +++ b/utilities/setup_builder.py @@ -0,0 +1,35 @@ +from setuptools import build_meta + +class builder(build_meta._BuildMetaBackend): + + def run_setup(self, setup_script='setup.py'): + # Note that we can reuse our build directory between calls + # Correctness comes first, then optimization later + __file__ = setup_script + __name__ = '__main__' + + with build_meta._open_setup_script(__file__) as f: + code = f.read().replace(r'\r\n', r'\n') + args = locals() + args["cmake_args"] = self.extra_args + exec(code, args) + + + def add_settings(self, config_settings): + self.extra_args = dict() + print(config_settings) + if config_settings: + self.extra_args = {k:v for k,v in config_settings.items() if k[:2] == "-D"} + + def build_wheel(self, wheel_directory, config_settings=None, + metadata_directory=None): + self.add_settings(config_settings) + return super().build_wheel(wheel_directory, config_settings, metadata_directory) + + def build_sdist(self, sdist_directory, config_settings=None): + self.add_settings(config_settings) + return super().build_wheel(sdist_directory, config_settings) + +build = builder() +build_wheel = build.build_wheel +build_sdist = build.build_sdist \ No newline at end of file